2012-10-23 19:02:30 +02:00
|
|
|
;; Machine description for AArch64 AdvSIMD architecture.
|
2018-01-03 11:03:58 +01:00
|
|
|
;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Contributed by ARM Ltd.
|
|
|
|
;;
|
|
|
|
;; This file is part of GCC.
|
|
|
|
;;
|
|
|
|
;; GCC is free software; you can redistribute it and/or modify it
|
|
|
|
;; under the terms of the GNU General Public License as published by
|
|
|
|
;; the Free Software Foundation; either version 3, or (at your option)
|
|
|
|
;; any later version.
|
|
|
|
;;
|
|
|
|
;; GCC is distributed in the hope that it will be useful, but
|
|
|
|
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
;; General Public License for more details.
|
|
|
|
;;
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
|
|
;; along with GCC; see the file COPYING3. If not see
|
|
|
|
;; <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
(define_expand "mov<mode>"
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
|
|
|
|
(match_operand:VALL_F16 1 "general_operand" ""))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"
|
2017-09-13 18:49:42 +02:00
|
|
|
/* Force the operand into a register if it is not an
|
|
|
|
immediate whose use can be replaced with xzr.
|
|
|
|
If the mode is 16 bytes wide, then we will be doing
|
|
|
|
a stp in DI mode, so we check the validity of that.
|
|
|
|
If the mode is 8 bytes wide, then we will do doing a
|
|
|
|
normal str, so the check need not apply. */
|
|
|
|
if (GET_CODE (operands[0]) == MEM
|
|
|
|
&& !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
|
|
|
|
&& ((GET_MODE_SIZE (<MODE>mode) == 16
|
|
|
|
&& aarch64_mem_pair_operand (operands[0], DImode))
|
|
|
|
|| GET_MODE_SIZE (<MODE>mode) == 8)))
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
|
|
"
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "movmisalign<mode>"
|
2014-06-04 05:45:50 +02:00
|
|
|
[(set (match_operand:VALL 0 "nonimmediate_operand" "")
|
|
|
|
(match_operand:VALL 1 "general_operand" ""))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
/* This pattern is not permitted to fail during expansion: if both arguments
|
|
|
|
are non-registers (e.g. memory := constant, which can be created by the
|
|
|
|
auto-vectorizer), force operand 1 into a register. */
|
|
|
|
if (!register_operand (operands[0], <MODE>mode)
|
|
|
|
&& !register_operand (operands[1], <MODE>mode))
|
|
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_dup<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
|
|
|
|
(vec_duplicate:VDQ_I
|
Improve dup pattern
Improve the dup pattern to prefer vector registers. When doing a dup
after a load, the register allocator thinks the costs are identical
and chooses an integer load. However a dup from an integer register
includes an int->fp transfer which is not modelled. Adding a '?' to
the integer variant means the cost is increased slightly so we prefer
using a vector register. This improves the following example:
#include <arm_neon.h>
void f(unsigned *a, uint32x4_t *b)
{
b[0] = vdupq_n_u32(a[1]);
b[1] = vdupq_n_u32(a[2]);
}
to:
ldr s0, [x0, 4]
dup v0.4s, v0.s[0]
str q0, [x1]
ldr s0, [x0, 8]
dup v0.4s, v0.s[0]
str q0, [x1, 16]
ret
gcc/
* config/aarch64/aarch64-simd.md (aarch64_simd_dup):
Swap alternatives, make integer dup more expensive.
From-SVN: r249443
2017-06-21 12:46:02 +02:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "w,?r")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
From-SVN: r202180
2013-09-02 18:22:10 +02:00
|
|
|
"@
|
Improve dup pattern
Improve the dup pattern to prefer vector registers. When doing a dup
after a load, the register allocator thinks the costs are identical
and chooses an integer load. However a dup from an integer register
includes an int->fp transfer which is not modelled. Adding a '?' to
the integer variant means the cost is increased slightly so we prefer
using a vector register. This improves the following example:
#include <arm_neon.h>
void f(unsigned *a, uint32x4_t *b)
{
b[0] = vdupq_n_u32(a[1]);
b[1] = vdupq_n_u32(a[2]);
}
to:
ldr s0, [x0, 4]
dup v0.4s, v0.s[0]
str q0, [x1]
ldr s0, [x0, 8]
dup v0.4s, v0.s[0]
str q0, [x1, 16]
ret
gcc/
* config/aarch64/aarch64-simd.md (aarch64_simd_dup):
Swap alternatives, make integer dup more expensive.
From-SVN: r249443
2017-06-21 12:46:02 +02:00
|
|
|
dup\\t%0.<Vtype>, %1.<Vetype>[0]
|
|
|
|
dup\\t%0.<Vtype>, %<vw>1"
|
|
|
|
[(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
|
[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
From-SVN: r202180
2013-09-02 18:22:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_dup<mode>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(set (match_operand:VDQF_F16 0 "register_operand" "=w")
|
|
|
|
(vec_duplicate:VDQF_F16
|
|
|
|
(match_operand:<VEL> 1 "register_operand" "w")))]
|
[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
From-SVN: r202180
2013-09-02 18:22:10 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"dup\\t%0.<Vtype>, %1.<Vetype>[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_dup<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_dup_lane<mode>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(vec_duplicate:VALL_F16
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_select:<VEL>
|
2015-09-08 21:18:29 +02:00
|
|
|
(match_operand:VALL_F16 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_dup<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
From-SVN: r202180
2013-09-02 18:22:10 +02:00
|
|
|
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
|
2017-03-09 11:34:36 +01:00
|
|
|
[(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
|
|
|
|
(vec_duplicate:VALL_F16_NO_V2Q
|
[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
From-SVN: r202180
2013-09-02 18:22:10 +02:00
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
|
|
|
|
)))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_dup<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2017-10-30 19:35:32 +01:00
|
|
|
(define_insn "*aarch64_simd_mov<VD:mode>"
|
2014-06-04 05:45:50 +02:00
|
|
|
[(set (match_operand:VD 0 "nonimmediate_operand"
|
2017-08-17 14:54:10 +02:00
|
|
|
"=w, m, m, w, ?r, ?w, ?r, w")
|
2014-06-04 05:45:50 +02:00
|
|
|
(match_operand:VD 1 "general_operand"
|
2017-08-17 14:54:10 +02:00
|
|
|
"m, Dz, w, w, w, r, r, Dn"))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD
|
|
|
|
&& (register_operand (operands[0], <MODE>mode)
|
2017-08-17 14:54:10 +02:00
|
|
|
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
|
|
|
switch (which_alternative)
|
|
|
|
{
|
2017-08-17 14:54:10 +02:00
|
|
|
case 0: return "ldr\t%d0, %1";
|
|
|
|
case 1: return "str\txzr, %0";
|
|
|
|
case 2: return "str\t%d1, %0";
|
|
|
|
case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
|
|
|
|
case 4: return "umov\t%0, %1.d[0]";
|
|
|
|
case 5: return "fmov\t%d0, %1";
|
|
|
|
case 6: return "mov\t%0, %1";
|
|
|
|
case 7:
|
2018-01-03 22:43:44 +01:00
|
|
|
return aarch64_output_simd_mov_immediate (operands[1], 64);
|
2012-10-23 19:02:30 +02:00
|
|
|
default: gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
2017-10-30 19:35:32 +01:00
|
|
|
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
|
2017-03-16 12:41:24 +01:00
|
|
|
neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
|
|
|
mov_reg, neon_move<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2017-10-30 19:35:32 +01:00
|
|
|
(define_insn "*aarch64_simd_mov<VQ:mode>"
|
2014-06-04 05:45:50 +02:00
|
|
|
[(set (match_operand:VQ 0 "nonimmediate_operand"
|
2017-09-13 18:49:42 +02:00
|
|
|
"=w, Umq, m, w, ?r, ?w, ?r, w")
|
2014-06-04 05:45:50 +02:00
|
|
|
(match_operand:VQ 1 "general_operand"
|
2017-08-17 14:54:10 +02:00
|
|
|
"m, Dz, w, w, w, r, r, Dn"))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD
|
|
|
|
&& (register_operand (operands[0], <MODE>mode)
|
2017-08-17 14:54:10 +02:00
|
|
|
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
2013-01-07 16:22:06 +01:00
|
|
|
switch (which_alternative)
|
|
|
|
{
|
2013-05-13 16:22:02 +02:00
|
|
|
case 0:
|
2017-08-17 14:54:10 +02:00
|
|
|
return "ldr\t%q0, %1";
|
2013-05-13 16:22:02 +02:00
|
|
|
case 1:
|
2017-08-17 14:54:10 +02:00
|
|
|
return "stp\txzr, xzr, %0";
|
2013-05-13 16:22:02 +02:00
|
|
|
case 2:
|
2017-08-17 14:54:10 +02:00
|
|
|
return "str\t%q1, %0";
|
2013-05-13 16:22:02 +02:00
|
|
|
case 3:
|
2017-08-17 14:54:10 +02:00
|
|
|
return "mov\t%0.<Vbtype>, %1.<Vbtype>";
|
2013-05-13 16:22:02 +02:00
|
|
|
case 4:
|
|
|
|
case 5:
|
2013-01-07 16:22:06 +01:00
|
|
|
case 6:
|
2017-08-17 14:54:10 +02:00
|
|
|
return "#";
|
|
|
|
case 7:
|
2018-01-03 22:43:44 +01:00
|
|
|
return aarch64_output_simd_mov_immediate (operands[1], 128);
|
2013-05-13 16:22:02 +02:00
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
2013-01-07 16:22:06 +01:00
|
|
|
}
|
2012-10-23 19:02:30 +02:00
|
|
|
}
|
2017-10-30 19:35:32 +01:00
|
|
|
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
|
|
|
|
neon_logic<q>, multiple, multiple,\
|
2017-08-17 14:54:10 +02:00
|
|
|
multiple, neon_move<q>")
|
|
|
|
(set_attr "length" "4,4,4,4,8,8,8,4")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2017-06-05 10:52:02 +02:00
|
|
|
;; When storing lane zero we can use the normal STR and its more permissive
|
|
|
|
;; addressing modes.
|
|
|
|
|
|
|
|
(define_insn "aarch64_store_lane0<mode>"
|
|
|
|
[(set (match_operand:<VEL> 0 "memory_operand" "=m")
|
|
|
|
(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
|
|
|
|
(parallel [(match_operand 2 "const_int_operand" "n")])))]
|
|
|
|
"TARGET_SIMD
|
2017-11-13 09:21:16 +01:00
|
|
|
&& ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
|
2017-06-05 10:52:02 +02:00
|
|
|
"str\\t%<Vetype>1, %0"
|
|
|
|
[(set_attr "type" "neon_store1_1reg<q>")]
|
|
|
|
)
|
|
|
|
|
2015-10-20 19:18:24 +02:00
|
|
|
(define_insn "load_pair<mode>"
|
|
|
|
[(set (match_operand:VD 0 "register_operand" "=w")
|
|
|
|
(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
|
|
|
|
(set (match_operand:VD 2 "register_operand" "=w")
|
|
|
|
(match_operand:VD 3 "memory_operand" "m"))]
|
|
|
|
"TARGET_SIMD
|
|
|
|
&& rtx_equal_p (XEXP (operands[3], 0),
|
|
|
|
plus_constant (Pmode,
|
|
|
|
XEXP (operands[1], 0),
|
|
|
|
GET_MODE_SIZE (<MODE>mode)))"
|
|
|
|
"ldp\\t%d0, %d2, %1"
|
|
|
|
[(set_attr "type" "neon_ldp")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "store_pair<mode>"
|
|
|
|
[(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
|
|
|
|
(match_operand:VD 1 "register_operand" "w"))
|
|
|
|
(set (match_operand:VD 2 "memory_operand" "=m")
|
|
|
|
(match_operand:VD 3 "register_operand" "w"))]
|
|
|
|
"TARGET_SIMD
|
|
|
|
&& rtx_equal_p (XEXP (operands[2], 0),
|
|
|
|
plus_constant (Pmode,
|
|
|
|
XEXP (operands[0], 0),
|
|
|
|
GET_MODE_SIZE (<MODE>mode)))"
|
|
|
|
"stp\\t%d1, %d3, %0"
|
|
|
|
[(set_attr "type" "neon_stp")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_split
|
|
|
|
[(set (match_operand:VQ 0 "register_operand" "")
|
|
|
|
(match_operand:VQ 1 "register_operand" ""))]
|
|
|
|
"TARGET_SIMD && reload_completed
|
|
|
|
&& GP_REGNUM_P (REGNO (operands[0]))
|
|
|
|
&& GP_REGNUM_P (REGNO (operands[1]))"
|
2015-01-21 18:53:31 +01:00
|
|
|
[(const_int 0)]
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
2015-01-21 18:53:31 +01:00
|
|
|
aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
|
|
|
|
DONE;
|
2012-10-23 19:02:30 +02:00
|
|
|
})
|
|
|
|
|
2013-05-07 14:47:18 +02:00
|
|
|
(define_split
|
|
|
|
[(set (match_operand:VQ 0 "register_operand" "")
|
|
|
|
(match_operand:VQ 1 "register_operand" ""))]
|
|
|
|
"TARGET_SIMD && reload_completed
|
|
|
|
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|
|
|
|
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
|
|
|
|
[(const_int 0)]
|
|
|
|
{
|
|
|
|
aarch64_split_simd_move (operands[0], operands[1]);
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2013-05-13 16:22:02 +02:00
|
|
|
(define_expand "aarch64_split_simd_mov<mode>"
|
2013-05-07 14:47:18 +02:00
|
|
|
[(set (match_operand:VQ 0)
|
|
|
|
(match_operand:VQ 1))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx dst = operands[0];
|
|
|
|
rtx src = operands[1];
|
|
|
|
|
|
|
|
if (GP_REGNUM_P (REGNO (src)))
|
|
|
|
{
|
2013-05-13 16:22:02 +02:00
|
|
|
rtx src_low_part = gen_lowpart (<VHALF>mode, src);
|
|
|
|
rtx src_high_part = gen_highpart (<VHALF>mode, src);
|
2013-05-07 14:47:18 +02:00
|
|
|
|
|
|
|
emit_insn
|
2013-05-13 16:22:02 +02:00
|
|
|
(gen_move_lo_quad_<mode> (dst, src_low_part));
|
2013-05-07 14:47:18 +02:00
|
|
|
emit_insn
|
2013-05-13 16:22:02 +02:00
|
|
|
(gen_move_hi_quad_<mode> (dst, src_high_part));
|
2013-05-07 14:47:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
{
|
2013-05-13 16:22:02 +02:00
|
|
|
rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
|
|
|
|
rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
|
|
|
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2013-05-07 14:47:18 +02:00
|
|
|
|
|
|
|
emit_insn
|
2013-05-13 16:22:02 +02:00
|
|
|
(gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
|
2013-05-07 14:47:18 +02:00
|
|
|
emit_insn
|
2013-05-13 16:22:02 +02:00
|
|
|
(gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
|
2013-05-07 14:47:18 +02:00
|
|
|
}
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_mov_from_<mode>low"
|
|
|
|
[(set (match_operand:<VHALF> 0 "register_operand" "=r")
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
|
|
|
|
"TARGET_SIMD && reload_completed"
|
|
|
|
"umov\t%0, %1.d[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_to_gp<q>")
|
2013-05-07 14:47:18 +02:00
|
|
|
(set_attr "length" "4")
|
|
|
|
])
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_mov_from_<mode>high"
|
|
|
|
[(set (match_operand:<VHALF> 0 "register_operand" "=r")
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
|
|
|
|
"TARGET_SIMD && reload_completed"
|
|
|
|
"umov\t%0, %1.d[1]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_to_gp<q>")
|
2013-05-07 14:47:18 +02:00
|
|
|
(set_attr "length" "4")
|
|
|
|
])
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "orn<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "bic<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "add<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_add<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "sub<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sub<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "mul<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mul_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2014-12-05 20:44:47 +01:00
|
|
|
(define_insn "bswap<mode>2"
|
2014-04-24 10:05:07 +02:00
|
|
|
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
|
|
|
|
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_rev<q>")]
|
|
|
|
)
|
|
|
|
|
2014-09-05 11:58:21 +02:00
|
|
|
(define_insn "aarch64_rbit<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
|
|
|
|
UNSPEC_RBIT))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"rbit\\t%0.<Vbtype>, %1.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_rbit")]
|
|
|
|
)
|
|
|
|
|
2014-11-21 17:56:21 +01:00
|
|
|
(define_expand "ctz<mode>2"
|
|
|
|
[(set (match_operand:VS 0 "register_operand")
|
|
|
|
(ctz:VS (match_operand:VS 1 "register_operand")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2014-12-05 20:44:47 +01:00
|
|
|
emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
|
2014-11-21 17:56:21 +01:00
|
|
|
rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
|
|
|
|
<MODE>mode, 0);
|
|
|
|
emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
|
|
|
|
emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2017-08-08 15:17:41 +02:00
|
|
|
(define_expand "xorsign<mode>3"
|
|
|
|
[(match_operand:VHSDF 0 "register_operand")
|
|
|
|
(match_operand:VHSDF 1 "register_operand")
|
|
|
|
(match_operand:VHSDF 2 "register_operand")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
|
2017-08-31 11:52:38 +02:00
|
|
|
machine_mode imode = <V_INT_EQUIV>mode;
|
2017-08-08 15:17:41 +02:00
|
|
|
rtx v_bitmask = gen_reg_rtx (imode);
|
|
|
|
rtx op1x = gen_reg_rtx (imode);
|
|
|
|
rtx op2x = gen_reg_rtx (imode);
|
|
|
|
|
|
|
|
rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
|
|
|
|
rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
|
|
|
|
|
|
|
|
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
|
|
|
|
|
|
|
|
emit_move_insn (v_bitmask,
|
2017-08-31 11:52:38 +02:00
|
|
|
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
|
2017-08-08 15:17:41 +02:00
|
|
|
HOST_WIDE_INT_M1U << bits));
|
|
|
|
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
|
|
|
|
emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
|
2017-08-08 15:17:41 +02:00
|
|
|
emit_move_insn (operands[0],
|
|
|
|
lowpart_subreg (<MODE>mode, op1x, imode));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2017-10-16 11:56:41 +02:00
|
|
|
;; These instructions map to the __builtins for the Dot Product operations.
|
|
|
|
(define_insn "aarch64_<sur>dot<vsi2qi>"
|
|
|
|
[(set (match_operand:VS 0 "register_operand" "=w")
|
|
|
|
(plus:VS (match_operand:VS 1 "register_operand" "0")
|
|
|
|
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VSI2QI> 3 "register_operand" "w")]
|
|
|
|
DOTPROD)))]
|
|
|
|
"TARGET_DOTPROD"
|
|
|
|
"<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
|
|
|
|
[(set_attr "type" "neon_dot")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; These expands map to the Dot Product optab the vectorizer checks for.
|
|
|
|
;; The auto-vectorizer expects a dot product builtin that also does an
|
|
|
|
;; accumulation into the provided register.
|
|
|
|
;; Given the following pattern
|
|
|
|
;;
|
|
|
|
;; for (i=0; i<len; i++) {
|
|
|
|
;; c = a[i] * b[i];
|
|
|
|
;; r += c;
|
|
|
|
;; }
|
|
|
|
;; return result;
|
|
|
|
;;
|
|
|
|
;; This can be auto-vectorized to
|
|
|
|
;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
|
|
|
|
;;
|
|
|
|
;; given enough iterations. However the vectorizer can keep unrolling the loop
|
|
|
|
;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
|
|
|
|
;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
|
|
|
|
;; ...
|
|
|
|
;;
|
|
|
|
;; and so the vectorizer provides r, in which the result has to be accumulated.
|
|
|
|
(define_expand "<sur>dot_prod<vsi2qi>"
|
|
|
|
[(set (match_operand:VS 0 "register_operand")
|
|
|
|
(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
|
|
|
|
(match_operand:<VSI2QI> 2 "register_operand")]
|
|
|
|
DOTPROD)
|
|
|
|
(match_operand:VS 3 "register_operand")))]
|
|
|
|
"TARGET_DOTPROD"
|
|
|
|
{
|
|
|
|
emit_insn (
|
|
|
|
gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
|
|
|
|
operands[2]));
|
|
|
|
emit_insn (gen_rtx_SET (operands[0], operands[3]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; These instructions map to the __builtins for the Dot Product
|
|
|
|
;; indexed operations.
|
|
|
|
(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
|
|
|
|
[(set (match_operand:VS 0 "register_operand" "=w")
|
|
|
|
(plus:VS (match_operand:VS 1 "register_operand" "0")
|
|
|
|
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
|
|
|
|
(match_operand:V8QI 3 "register_operand" "<h_con>")
|
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
DOTPROD)))]
|
|
|
|
"TARGET_DOTPROD"
|
|
|
|
{
|
2017-11-13 09:21:16 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
|
2017-10-16 11:56:41 +02:00
|
|
|
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_dot")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
|
|
|
|
[(set (match_operand:VS 0 "register_operand" "=w")
|
|
|
|
(plus:VS (match_operand:VS 1 "register_operand" "0")
|
|
|
|
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
|
|
|
|
(match_operand:V16QI 3 "register_operand" "<h_con>")
|
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
DOTPROD)))]
|
|
|
|
"TARGET_DOTPROD"
|
|
|
|
{
|
2017-11-13 09:21:16 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
|
2017-10-16 11:56:41 +02:00
|
|
|
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_dot")]
|
|
|
|
)
|
|
|
|
|
2017-01-19 19:30:44 +01:00
|
|
|
(define_expand "copysign<mode>3"
|
|
|
|
[(match_operand:VHSDF 0 "register_operand")
|
|
|
|
(match_operand:VHSDF 1 "register_operand")
|
|
|
|
(match_operand:VHSDF 2 "register_operand")]
|
|
|
|
"TARGET_FLOAT && TARGET_SIMD"
|
|
|
|
{
|
2017-08-31 11:52:38 +02:00
|
|
|
rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2017-01-19 19:30:44 +01:00
|
|
|
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
|
|
|
|
|
|
|
|
emit_move_insn (v_bitmask,
|
2017-08-31 11:52:38 +02:00
|
|
|
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
|
2017-01-19 19:30:44 +01:00
|
|
|
HOST_WIDE_INT_M1U << bits));
|
|
|
|
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
|
|
|
|
operands[2], operands[1]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2013-09-16 11:50:21 +02:00
|
|
|
(define_insn "*aarch64_mul3_elt<mode>"
|
|
|
|
[(set (match_operand:VMUL 0 "register_operand" "=w")
|
|
|
|
(mult:VMUL
|
|
|
|
(vec_duplicate:VMUL
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VMUL 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VMUL 3 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
|
|
|
|
}
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
2013-09-16 11:50:21 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
|
|
|
|
(mult:VMUL_CHANGE_NLANES
|
|
|
|
(vec_duplicate:VMUL_CHANGE_NLANES
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
|
2013-09-16 11:50:21 +02:00
|
|
|
)
|
|
|
|
|
2016-05-17 18:37:19 +02:00
|
|
|
(define_insn "*aarch64_mul3_elt_from_dup<mode>"
|
|
|
|
[(set (match_operand:VMUL 0 "register_operand" "=w")
|
|
|
|
(mult:VMUL
|
|
|
|
(vec_duplicate:VMUL
|
|
|
|
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
|
|
|
|
(match_operand:VMUL 2 "register_operand" "w")))]
|
2013-09-16 11:50:21 +02:00
|
|
|
"TARGET_SIMD"
|
2016-05-17 18:37:19 +02:00
|
|
|
"<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
2013-09-16 11:50:21 +02:00
|
|
|
)
|
|
|
|
|
2016-06-08 12:12:53 +02:00
|
|
|
(define_insn "aarch64_rsqrte<mode>"
|
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
gcc/
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
mode.
(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise.
* config/aarch64/arm_neon.h: Include arm_fp16.h.
* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
Support HF mode.
* config/aarch64/arm_fp16.h: New file.
(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
vsqrth_f16): New.
From-SVN: r238722
2016-07-25 18:00:28 +02:00
|
|
|
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
|
2015-11-06 18:10:17 +01:00
|
|
|
UNSPEC_RSQRTE))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
|
2015-11-06 18:10:17 +01:00
|
|
|
|
2016-06-08 12:14:30 +02:00
|
|
|
(define_insn "aarch64_rsqrts<mode>"
|
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
From-SVN: r238723
2016-07-25 18:10:52 +02:00
|
|
|
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
UNSPEC_RSQRTS))]
|
2015-11-06 18:10:17 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
|
2015-11-06 18:10:17 +01:00
|
|
|
|
Add an rsqrt_optab and IFN_RSQRT internal function
All current uses of builtin_reciprocal convert 1.0/sqrt into rsqrt.
This patch adds an rsqrt optab and associated internal function for
that instead. We can then pick up the vector forms of rsqrt automatically,
fixing an AArch64 regression from my internal_fn patches.
With that change, builtin_reciprocal only needs to handle target-specific
built-in functions. I've restricted the hook to those since, if we need
a reciprocal of another standard function later, I think there should be
a strong preference for adding a new optab and internal function for it,
rather than hiding the code in a backend.
Three targets implement builtin_reciprocal: aarch64, i386 and rs6000.
i386 and rs6000 already used the obvious rsqrt<mode>2 pattern names
for the instructions, so they pick up the new code automatically.
aarch64 needs a slight rename.
mn10300 is unusual in that its native operation is rsqrt, and
sqrt is approximated as 1.0/rsqrt. The port also uses rsqrt<mode>2
for the rsqrt pattern, so after the patch we now pick it up as a native
operation.
Two other ports define rsqrt patterns: sh and v850. AFAICT these
patterns aren't currently used, but I think the patch does what the
authors of the patterns would have expected. There's obviously some
risk of fallout though.
Tested on x86_64-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf
(as a target without the hooks) and powerpc64-linux-gnu.
gcc/
* internal-fn.def (RSQRT): New function.
* optabs.def (rsqrt_optab): New optab.
* doc/md.texi (rsqrtM2): Document.
* target.def (builtin_reciprocal): Replace gcall argument with
a function decl. Restrict hook to machine functions.
* doc/tm.texi: Regenerate.
* targhooks.h (default_builtin_reciprocal): Update prototype.
* targhooks.c (default_builtin_reciprocal): Likewise.
* tree-ssa-math-opts.c: Include internal-fn.h.
(internal_fn_reciprocal): New function.
(pass_cse_reciprocals::execute): Call it, and build a call to an
internal function on success. Only call targetm.builtin_reciprocal
for machine functions.
* config/aarch64/aarch64-protos.h (aarch64_builtin_rsqrt): Remove
second argument.
* config/aarch64/aarch64-builtins.c (aarch64_expand_builtin_rsqrt):
Rename aarch64_rsqrt_<mode>2 to rsqrt<mode>2.
(aarch64_builtin_rsqrt): Remove md_fn argument and only handle
machine functions.
* config/aarch64/aarch64.c (use_rsqrt_p): New function.
(aarch64_builtin_reciprocal): Replace gcall argument with a
function decl. Use use_rsqrt_p. Remove optimize_size check.
Only handle machine functions. Update call to aarch64_builtin_rsqrt.
(aarch64_optab_supported_p): New function.
(TARGET_OPTAB_SUPPORTED_P): Define.
* config/aarch64/aarch64-simd.md (aarch64_rsqrt_<mode>2): Rename to...
(rsqrt<mode>2): ...this.
* config/i386/i386.c (use_rsqrt_p): New function.
(ix86_builtin_reciprocal): Replace gcall argument with a
function decl. Use use_rsqrt_p. Remove optimize_insn_for_size_p
check. Only handle machine functions.
(ix86_optab_supported_p): Handle rsqrt_optab.
* config/rs6000/rs6000.c (TARGET_OPTAB_SUPPORTED_P): Define.
(rs6000_builtin_reciprocal): Replace gcall argument with a
function decl. Remove optimize_insn_for_size_p check.
Only handle machine functions.
(rs6000_optab_supported_p): New function.
From-SVN: r231229
2015-12-03 15:31:55 +01:00
|
|
|
(define_expand "rsqrt<mode>2"
|
2015-11-06 18:10:17 +01:00
|
|
|
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
|
|
|
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
|
|
|
|
UNSPEC_RSQRT))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2016-06-13 21:02:56 +02:00
|
|
|
aarch64_emit_approx_sqrt (operands[0], operands[1], true);
|
2015-11-06 18:10:17 +01:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2013-09-16 11:50:21 +02:00
|
|
|
(define_insn "*aarch64_mul3_elt_to_64v2df"
|
|
|
|
[(set (match_operand:DF 0 "register_operand" "=w")
|
|
|
|
(mult:DF
|
|
|
|
(vec_select:DF
|
|
|
|
(match_operand:V2DF 1 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")]))
|
|
|
|
(match_operand:DF 3 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mul_d_scalar_q")]
|
2013-09-16 11:50:21 +02:00
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "neg<mode>2"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"neg\t%0.<Vtype>, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_neg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "abs<mode>2"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"abs\t%0.<Vtype>, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_abs<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2015-01-28 11:08:57 +01:00
|
|
|
;; The intrinsic version of integer ABS must not be allowed to
|
|
|
|
;; combine with any operation with an integerated ABS step, such
|
|
|
|
;; as SABD.
|
|
|
|
(define_insn "aarch64_abs<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I_DI
|
|
|
|
[(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
|
|
|
|
UNSPEC_ABS))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
|
|
|
|
[(set_attr "type" "neon_abs<q>")]
|
|
|
|
)
|
|
|
|
|
2013-03-21 08:47:12 +01:00
|
|
|
(define_insn "abd<mode>_3"
|
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(abs:VDQ_BHSI (minus:VDQ_BHSI
|
|
|
|
(match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_abd<q>")]
|
2013-03-21 08:47:12 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aba<mode>_3"
|
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
|
|
|
|
(match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")))
|
|
|
|
(match_operand:VDQ_BHSI 3 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_arith_acc<q>")]
|
2013-03-21 08:47:12 +01:00
|
|
|
)
|
|
|
|
|
2016-06-08 12:16:07 +02:00
|
|
|
(define_insn "fabd<mode>3"
|
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
From-SVN: r238723
2016-07-25 18:10:52 +02:00
|
|
|
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
|
|
|
(abs:VHSDF_HSDF
|
|
|
|
(minus:VHSDF_HSDF
|
|
|
|
(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
|
2013-05-03 12:17:57 +02:00
|
|
|
"TARGET_SIMD"
|
2016-06-08 12:16:07 +02:00
|
|
|
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_abd_<stype><q>")]
|
2013-05-03 12:17:57 +02:00
|
|
|
)
|
|
|
|
|
2017-10-04 18:59:40 +02:00
|
|
|
;; For AND (vector, register) and BIC (vector, immediate)
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "and<mode>3"
|
2017-10-04 18:59:40 +02:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
|
|
|
|
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2017-10-04 18:59:40 +02:00
|
|
|
{
|
|
|
|
switch (which_alternative)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
|
|
|
|
case 1:
|
2018-01-03 22:43:44 +01:00
|
|
|
return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
|
|
|
|
AARCH64_CHECK_BIC);
|
2017-10-04 18:59:40 +02:00
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2017-10-04 18:59:40 +02:00
|
|
|
;; For ORR (vector, register) and ORR (vector, immediate)
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "ior<mode>3"
|
2017-10-04 18:59:40 +02:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
|
|
|
|
(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2017-10-04 18:59:40 +02:00
|
|
|
{
|
|
|
|
switch (which_alternative)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
|
|
|
|
case 1:
|
2018-01-03 22:43:44 +01:00
|
|
|
return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
|
|
|
|
AARCH64_CHECK_ORR);
|
2017-10-04 18:59:40 +02:00
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "xor<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "one_cmpl<mode>2"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"not\t%0.<Vbtype>, %1.<Vbtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_logic<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_set<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
|
|
|
|
(vec_merge:VDQ_BHSI
|
|
|
|
(vec_duplicate:VDQ_BHSI
|
[AArch64] Extend aarch64_simd_vec_set pattern, replace asm for vld1_lane
gcc/:
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_set<mode>): Add
variant reading from memory and assembling to ld1.
* config/aarch64/arm_neon.h (vld1_lane_f32, vld1_lane_f64, vld1_lane_p8,
vld1_lane_p16, vld1_lane_s8, vld1_lane_s16, vld1_lane_s32,
vld1_lane_s64, vld1_lane_u8, vld1_lane_u16, vld1_lane_u32,
vld1_lane_u64, vld1q_lane_f32, vld1q_lane_f64, vld1q_lane_p8,
vld1q_lane_p16, vld1q_lane_s8, vld1q_lane_s16, vld1q_lane_s32,
vld1q_lane_s64, vld1q_lane_u8, vld1q_lane_u16, vld1q_lane_u32,
vld1q_lane_u64): Replace asm with vset_lane and pointer dereference.
gcc/testsuite/:
* gcc.target/aarch64/vld1_lane.c: New test.
From-SVN: r217665
2014-11-17 19:29:49 +01:00
|
|
|
(match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
|
[AArch64] Extend aarch64_simd_vec_set pattern, replace asm for vld1_lane
gcc/:
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_set<mode>): Add
variant reading from memory and assembling to ld1.
* config/aarch64/arm_neon.h (vld1_lane_f32, vld1_lane_f64, vld1_lane_p8,
vld1_lane_p16, vld1_lane_s8, vld1_lane_s16, vld1_lane_s32,
vld1_lane_s64, vld1_lane_u8, vld1_lane_u16, vld1_lane_u32,
vld1_lane_u64, vld1q_lane_f32, vld1q_lane_f64, vld1q_lane_p8,
vld1q_lane_p16, vld1q_lane_s8, vld1q_lane_s16, vld1q_lane_s32,
vld1q_lane_s64, vld1q_lane_u8, vld1q_lane_u16, vld1q_lane_u32,
vld1q_lane_u64): Replace asm with vset_lane and pointer dereference.
gcc/testsuite/:
* gcc.target/aarch64/vld1_lane.c: New test.
From-SVN: r217665
2014-11-17 19:29:49 +01:00
|
|
|
(match_operand:SI 2 "immediate_operand" "i,i,i")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
|
2013-11-22 16:29:19 +01:00
|
|
|
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
|
|
|
|
switch (which_alternative)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return "ins\\t%0.<Vetype>[%p2], %w1";
|
|
|
|
case 1:
|
|
|
|
return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
|
[AArch64] Extend aarch64_simd_vec_set pattern, replace asm for vld1_lane
gcc/:
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_set<mode>): Add
variant reading from memory and assembling to ld1.
* config/aarch64/arm_neon.h (vld1_lane_f32, vld1_lane_f64, vld1_lane_p8,
vld1_lane_p16, vld1_lane_s8, vld1_lane_s16, vld1_lane_s32,
vld1_lane_s64, vld1_lane_u8, vld1_lane_u16, vld1_lane_u32,
vld1_lane_u64, vld1q_lane_f32, vld1q_lane_f64, vld1q_lane_p8,
vld1q_lane_p16, vld1q_lane_s8, vld1q_lane_s16, vld1q_lane_s32,
vld1q_lane_s64, vld1q_lane_u8, vld1q_lane_u16, vld1q_lane_u32,
vld1q_lane_u64): Replace asm with vset_lane and pointer dereference.
gcc/testsuite/:
* gcc.target/aarch64/vld1_lane.c: New test.
From-SVN: r217665
2014-11-17 19:29:49 +01:00
|
|
|
case 2:
|
|
|
|
return "ld1\\t{%0.<Vetype>}[%p2], %1";
|
2013-11-22 16:29:19 +01:00
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
2017-04-25 08:45:49 +02:00
|
|
|
[(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2016-06-30 17:15:26 +02:00
|
|
|
(define_insn "*aarch64_simd_vec_copy_lane<mode>"
|
2017-06-02 17:03:54 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(vec_merge:VALL_F16
|
|
|
|
(vec_duplicate:VALL_F16
|
2016-06-30 17:15:26 +02:00
|
|
|
(vec_select:<VEL>
|
2017-06-02 17:03:54 +02:00
|
|
|
(match_operand:VALL_F16 3 "register_operand" "w")
|
2016-06-30 17:15:26 +02:00
|
|
|
(parallel
|
|
|
|
[(match_operand:SI 4 "immediate_operand" "i")])))
|
2017-06-02 17:03:54 +02:00
|
|
|
(match_operand:VALL_F16 1 "register_operand" "0")
|
2016-06-30 17:15:26 +02:00
|
|
|
(match_operand:SI 2 "immediate_operand" "i")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-11-13 09:21:16 +01:00
|
|
|
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
|
2016-06-30 17:15:26 +02:00
|
|
|
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
|
2017-11-13 09:21:16 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
|
2016-06-30 17:15:26 +02:00
|
|
|
|
|
|
|
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_ins<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
|
2017-03-09 11:34:36 +01:00
|
|
|
[(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
|
|
|
|
(vec_merge:VALL_F16_NO_V2Q
|
|
|
|
(vec_duplicate:VALL_F16_NO_V2Q
|
2016-06-30 17:15:26 +02:00
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
|
|
|
|
(parallel
|
|
|
|
[(match_operand:SI 4 "immediate_operand" "i")])))
|
2017-03-09 11:34:36 +01:00
|
|
|
(match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
|
2016-06-30 17:15:26 +02:00
|
|
|
(match_operand:SI 2 "immediate_operand" "i")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-11-13 09:21:16 +01:00
|
|
|
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
|
2016-06-30 17:15:26 +02:00
|
|
|
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
|
2017-11-13 09:21:16 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
|
|
|
|
INTVAL (operands[4]));
|
2016-06-30 17:15:26 +02:00
|
|
|
|
|
|
|
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_ins<q>")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_simd_lshr<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"ushr\t%0.<Vtype>, %1.<Vtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_ashr<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"sshr\t%0.<Vtype>, %1.<Vtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_imm_shl<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"shl\t%0.<Vtype>, %1.<Vtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_reg_sshl<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_reg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")]
|
2012-10-23 19:02:30 +02:00
|
|
|
UNSPEC_ASHIFT_UNSIGNED))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_reg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_reg_shl<mode>_signed"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")]
|
2012-10-23 19:02:30 +02:00
|
|
|
UNSPEC_ASHIFT_SIGNED))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_reg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "ashl<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_I 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 2 "general_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
|
|
|
|
int shift_amount;
|
|
|
|
|
|
|
|
if (CONST_INT_P (operands[2]))
|
|
|
|
{
|
|
|
|
shift_amount = INTVAL (operands[2]);
|
|
|
|
if (shift_amount >= 0 && shift_amount < bit_width)
|
|
|
|
{
|
|
|
|
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
|
|
|
shift_amount);
|
|
|
|
emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
tmp));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (MEM_P (operands[2]))
|
|
|
|
{
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (REG_P (operands[2]))
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_aarch64_simd_dup<mode> (tmp,
|
|
|
|
convert_to_mode (<VEL>mode,
|
|
|
|
operands[2],
|
|
|
|
0)));
|
|
|
|
emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
|
|
|
|
tmp));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
FAIL;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "lshr<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_I 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 2 "general_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
|
|
|
|
int shift_amount;
|
|
|
|
|
|
|
|
if (CONST_INT_P (operands[2]))
|
|
|
|
{
|
|
|
|
shift_amount = INTVAL (operands[2]);
|
|
|
|
if (shift_amount > 0 && shift_amount <= bit_width)
|
|
|
|
{
|
|
|
|
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
|
|
|
shift_amount);
|
|
|
|
emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
tmp));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
else if (MEM_P (operands[2]))
|
|
|
|
{
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (REG_P (operands[2]))
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (SImode);
|
|
|
|
rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_negsi2 (tmp, operands[2]));
|
|
|
|
emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
|
|
|
|
convert_to_mode (<VEL>mode,
|
|
|
|
tmp, 0)));
|
|
|
|
emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
|
|
|
|
operands[1],
|
|
|
|
tmp1));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
FAIL;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "ashr<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_I 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 2 "general_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
|
|
|
|
int shift_amount;
|
|
|
|
|
|
|
|
if (CONST_INT_P (operands[2]))
|
|
|
|
{
|
|
|
|
shift_amount = INTVAL (operands[2]);
|
|
|
|
if (shift_amount > 0 && shift_amount <= bit_width)
|
|
|
|
{
|
|
|
|
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
|
|
|
shift_amount);
|
|
|
|
emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
tmp));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
else if (MEM_P (operands[2]))
|
|
|
|
{
|
|
|
|
operands[2] = force_reg (SImode, operands[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (REG_P (operands[2]))
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (SImode);
|
|
|
|
rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_negsi2 (tmp, operands[2]));
|
|
|
|
emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
|
|
|
|
convert_to_mode (<VEL>mode,
|
|
|
|
tmp, 0)));
|
|
|
|
emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
|
|
|
|
operands[1],
|
|
|
|
tmp1));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
FAIL;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vashl<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_I 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
|
|
|
|
operands[2]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
;; Using mode VDQ_BHSI as there is no V2DImode neg!
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Negating individual lanes most certainly offsets the
|
|
|
|
;; gain from vectorization.
|
|
|
|
(define_expand "vashr<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_BHSI 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_BHSI 1 "register_operand" "")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx neg = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit (gen_neg<mode>2 (neg, operands[2]));
|
|
|
|
emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
|
|
|
|
neg));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2014-01-23 17:24:13 +01:00
|
|
|
;; DI vector shift
|
|
|
|
(define_expand "aarch64_ashr_simddi"
|
|
|
|
[(match_operand:DI 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
2014-02-06 10:27:57 +01:00
|
|
|
(match_operand:SI 2 "aarch64_shift_imm64_di" "")]
|
2014-01-23 17:24:13 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2014-12-18 16:00:51 +01:00
|
|
|
/* An arithmetic shift right by 64 fills the result with copies of the sign
|
|
|
|
bit, just like asr by 63 - however the standard pattern does not handle
|
|
|
|
a shift by 64. */
|
2014-01-23 17:24:13 +01:00
|
|
|
if (INTVAL (operands[2]) == 64)
|
2014-12-18 16:00:51 +01:00
|
|
|
operands[2] = GEN_INT (63);
|
|
|
|
emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
|
2014-01-23 17:24:13 +01:00
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_expand "vlshr<mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_BHSI 0 "register_operand" "")
|
|
|
|
(match_operand:VDQ_BHSI 1 "register_operand" "")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx neg = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit (gen_neg<mode>2 (neg, operands[2]));
|
|
|
|
emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
|
|
|
|
neg));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2014-03-24 13:05:38 +01:00
|
|
|
(define_expand "aarch64_lshr_simddi"
|
|
|
|
[(match_operand:DI 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "aarch64_shift_imm64_di" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (INTVAL (operands[2]) == 64)
|
2014-12-18 16:20:11 +01:00
|
|
|
emit_move_insn (operands[0], const0_rtx);
|
2014-03-24 13:05:38 +01:00
|
|
|
else
|
|
|
|
emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_expand "vec_set<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(match_operand:VDQ_BHSI 0 "register_operand")
|
2013-11-13 16:04:44 +01:00
|
|
|
(match_operand:<VEL> 1 "register_operand")
|
|
|
|
(match_operand:SI 2 "immediate_operand")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
|
|
|
|
emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
|
|
|
|
GEN_INT (elem), operands[0]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2014-11-24 16:23:28 +01:00
|
|
|
;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
|
|
|
|
(define_insn "vec_shr_<mode>"
|
|
|
|
[(set (match_operand:VD 0 "register_operand" "=w")
|
2015-04-30 17:52:24 +02:00
|
|
|
(unspec:VD [(match_operand:VD 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
UNSPEC_VEC_SHR))]
|
2014-11-24 16:23:28 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
2015-04-30 17:52:24 +02:00
|
|
|
return "shl %d0, %d1, %2";
|
2014-11-24 16:23:28 +01:00
|
|
|
else
|
|
|
|
return "ushr %d0, %d1, %2";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_shift_imm")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_simd_vec_setv2di"
|
2013-11-13 16:04:44 +01:00
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w,w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_merge:V2DI
|
|
|
|
(vec_duplicate:V2DI
|
2013-11-13 16:04:44 +01:00
|
|
|
(match_operand:DI 1 "register_operand" "r,w"))
|
|
|
|
(match_operand:V2DI 3 "register_operand" "0,0")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i,i")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
|
2013-11-22 16:29:19 +01:00
|
|
|
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
|
|
|
|
switch (which_alternative)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return "ins\\t%0.d[%p2], %1";
|
|
|
|
case 1:
|
|
|
|
return "ins\\t%0.d[%p2], %1.d[0]";
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_from_gp, neon_ins_q")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_setv2di"
|
2013-11-13 16:04:44 +01:00
|
|
|
[(match_operand:V2DI 0 "register_operand")
|
|
|
|
(match_operand:DI 1 "register_operand")
|
|
|
|
(match_operand:SI 2 "immediate_operand")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
|
|
|
|
emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
|
|
|
|
GEN_INT (elem), operands[0]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_set<mode>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(set (match_operand:VDQF_F16 0 "register_operand" "=w")
|
|
|
|
(vec_merge:VDQF_F16
|
|
|
|
(vec_duplicate:VDQF_F16
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "w"))
|
2015-09-08 21:18:29 +02:00
|
|
|
(match_operand:VDQF_F16 3 "register_operand" "0")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 2 "immediate_operand" "i")))]
|
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
|
2013-11-22 16:29:19 +01:00
|
|
|
|
|
|
|
operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
|
|
|
|
return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_ins<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_set<mode>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(match_operand:VDQF_F16 0 "register_operand" "+w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
|
|
|
|
emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
|
|
|
|
GEN_INT (elem), operands[0]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
(define_insn "aarch64_mla<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(plus:VDQ_BHSI (mult:VDQ_BHSI
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 3 "register_operand" "w"))
|
|
|
|
(match_operand:VDQ_BHSI 1 "register_operand" "0")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-09-16 11:53:11 +02:00
|
|
|
(define_insn "*aarch64_mla_elt<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(plus:VDQHS
|
|
|
|
(mult:VDQHS
|
|
|
|
(vec_duplicate:VDQHS
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VDQHS 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQHS 3 "register_operand" "w"))
|
|
|
|
(match_operand:VDQHS 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(plus:VDQHS
|
|
|
|
(mult:VDQHS
|
|
|
|
(vec_duplicate:VDQHS
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQHS 3 "register_operand" "w"))
|
|
|
|
(match_operand:VDQHS 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
2017-07-24 13:37:09 +02:00
|
|
|
(define_insn "*aarch64_mla_elt_merge<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(plus:VDQHS
|
|
|
|
(mult:VDQHS (vec_duplicate:VDQHS
|
2017-08-31 18:03:09 +02:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
|
2017-07-24 13:37:09 +02:00
|
|
|
(match_operand:VDQHS 2 "register_operand" "w"))
|
|
|
|
(match_operand:VDQHS 3 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_mls<mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
|
|
|
|
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-09-16 11:53:11 +02:00
|
|
|
(define_insn "*aarch64_mls_elt<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(minus:VDQHS
|
|
|
|
(match_operand:VDQHS 4 "register_operand" "0")
|
|
|
|
(mult:VDQHS
|
|
|
|
(vec_duplicate:VDQHS
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VDQHS 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(minus:VDQHS
|
|
|
|
(match_operand:VDQHS 4 "register_operand" "0")
|
|
|
|
(mult:VDQHS
|
|
|
|
(vec_duplicate:VDQHS
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
2017-07-24 13:37:09 +02:00
|
|
|
(define_insn "*aarch64_mls_elt_merge<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(minus:VDQHS
|
|
|
|
(match_operand:VDQHS 1 "register_operand" "0")
|
|
|
|
(mult:VDQHS (vec_duplicate:VDQHS
|
2017-08-31 18:03:09 +02:00
|
|
|
(match_operand:<VEL> 2 "register_operand" "<h_con>"))
|
2017-07-24 13:37:09 +02:00
|
|
|
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
|
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Max/Min operations.
|
2013-05-01 17:16:14 +02:00
|
|
|
(define_insn "<su><maxmin><mode>3"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:16:14 +02:00
|
|
|
"<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_minmax<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2014-11-19 17:34:38 +01:00
|
|
|
(define_expand "<su><maxmin>v2di3"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "")
|
|
|
|
(MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
enum rtx_code cmp_operator;
|
|
|
|
rtx cmp_fmt;
|
|
|
|
|
|
|
|
switch (<CODE>)
|
|
|
|
{
|
|
|
|
case UMIN:
|
|
|
|
cmp_operator = LTU;
|
|
|
|
break;
|
|
|
|
case SMIN:
|
|
|
|
cmp_operator = LT;
|
|
|
|
break;
|
|
|
|
case UMAX:
|
|
|
|
cmp_operator = GTU;
|
|
|
|
break;
|
|
|
|
case SMAX:
|
|
|
|
cmp_operator = GT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
|
2016-08-10 17:34:23 +02:00
|
|
|
emit_insn (gen_vcondv2div2di (operands[0], operands[1],
|
2014-11-19 17:34:38 +01:00
|
|
|
operands[2], cmp_fmt, operands[1], operands[2]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): New pattern.
* config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): New
pattern.
* config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp,
uminp, smax_nanp, smin_nanp): New builtins.
* config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32,
vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32,
vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64,
vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64,
vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8,
vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8,
vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64,
vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64,
vpminnms_f32): Rewrite using builtin functions.
From-SVN: r219840
2015-01-19 14:22:41 +01:00
|
|
|
;; Pairwise Integer Max/Min operations.
|
|
|
|
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")]
|
|
|
|
MAXMINV))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_minmax<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Pairwise FP Max/Min operations.
|
|
|
|
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
|
|
|
FMAXMINV))]
|
aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): New pattern.
* config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): New
pattern.
* config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp,
uminp, smax_nanp, smin_nanp): New builtins.
* config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32,
vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32,
vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64,
vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64,
vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8,
vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8,
vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64,
vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64,
vpminnms_f32): Rewrite using builtin functions.
From-SVN: r219840
2015-01-19 14:22:41 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_minmax<q>")]
|
|
|
|
)
|
|
|
|
|
2014-07-04 17:56:27 +02:00
|
|
|
;; vec_concat gives a new vector with the low elements from operand 1, and
|
|
|
|
;; the high elements from operand 2. That is to say, given op1 = { a, b }
|
|
|
|
;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
|
|
|
|
;; What that means, is that the RTL descriptions of the below patterns
|
|
|
|
;; need to change depending on endianness.
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2014-07-04 17:56:27 +02:00
|
|
|
;; Move to the low architectural bits of the register.
|
|
|
|
;; On little-endian this is { operand, zeroes }
|
|
|
|
;; On big-endian this is { zeroes, operand }
|
|
|
|
|
|
|
|
(define_insn "move_lo_quad_internal_<mode>"
|
2014-09-04 18:06:13 +02:00
|
|
|
[(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
|
|
|
|
(vec_concat:VQ_NO2E
|
2013-05-13 16:22:02 +02:00
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r,r")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_duplicate:<VHALF> (const_int 0))))]
|
2014-07-04 17:56:27 +02:00
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
|
|
|
"@
|
|
|
|
dup\\t%d0, %1.d[0]
|
|
|
|
fmov\\t%d0, %1
|
|
|
|
dup\\t%d0, %1"
|
|
|
|
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
|
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")
|
|
|
|
(set_attr "length" "4")]
|
|
|
|
)
|
|
|
|
|
2014-09-04 18:06:13 +02:00
|
|
|
(define_insn "move_lo_quad_internal_<mode>"
|
|
|
|
[(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
|
|
|
|
(vec_concat:VQ_2E
|
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r,r")
|
|
|
|
(const_int 0)))]
|
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
|
|
|
"@
|
|
|
|
dup\\t%d0, %1.d[0]
|
|
|
|
fmov\\t%d0, %1
|
|
|
|
dup\\t%d0, %1"
|
|
|
|
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
|
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")
|
|
|
|
(set_attr "length" "4")]
|
|
|
|
)
|
|
|
|
|
2014-07-04 17:56:27 +02:00
|
|
|
(define_insn "move_lo_quad_internal_be_<mode>"
|
2014-09-04 18:06:13 +02:00
|
|
|
[(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
|
|
|
|
(vec_concat:VQ_NO2E
|
2014-07-04 17:56:27 +02:00
|
|
|
(vec_duplicate:<VHALF> (const_int 0))
|
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
2013-05-13 16:22:02 +02:00
|
|
|
"@
|
2013-06-11 17:02:47 +02:00
|
|
|
dup\\t%d0, %1.d[0]
|
|
|
|
fmov\\t%d0, %1
|
|
|
|
dup\\t%d0, %1"
|
2014-06-10 18:05:17 +02:00
|
|
|
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
|
2013-05-13 16:22:02 +02:00
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")
|
|
|
|
(set_attr "length" "4")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2014-09-04 18:06:13 +02:00
|
|
|
(define_insn "move_lo_quad_internal_be_<mode>"
|
|
|
|
[(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
|
|
|
|
(vec_concat:VQ_2E
|
|
|
|
(const_int 0)
|
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
|
|
|
"@
|
|
|
|
dup\\t%d0, %1.d[0]
|
|
|
|
fmov\\t%d0, %1
|
|
|
|
dup\\t%d0, %1"
|
|
|
|
[(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
|
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")
|
|
|
|
(set_attr "length" "4")]
|
|
|
|
)
|
|
|
|
|
2014-07-04 17:56:27 +02:00
|
|
|
(define_expand "move_lo_quad_<mode>"
|
|
|
|
[(match_operand:VQ 0 "register_operand")
|
|
|
|
(match_operand:VQ 1 "register_operand")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
|
|
|
|
else
|
|
|
|
emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Move operand1 to the high architectural bits of the register, keeping
|
|
|
|
;; the low architectural bits of operand2.
|
|
|
|
;; For little-endian this is { operand2, operand1 }
|
|
|
|
;; For big-endian this is { operand1, operand2 }
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_simd_move_hi_quad_<mode>"
|
2013-05-13 16:22:02 +02:00
|
|
|
[(set (match_operand:VQ 0 "register_operand" "+w,w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_concat:VQ
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_dup 0)
|
|
|
|
(match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
|
2013-05-13 16:22:02 +02:00
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r")))]
|
2014-07-04 17:56:27 +02:00
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
2013-05-13 16:22:02 +02:00
|
|
|
"@
|
|
|
|
ins\\t%0.d[1], %1.d[0]
|
|
|
|
ins\\t%0.d[1], %1"
|
2014-07-04 17:56:27 +02:00
|
|
|
[(set_attr "type" "neon_ins")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
|
|
|
|
[(set (match_operand:VQ 0 "register_operand" "+w,w")
|
|
|
|
(vec_concat:VQ
|
|
|
|
(match_operand:<VHALF> 1 "register_operand" "w,r")
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_dup 0)
|
2014-07-31 17:31:24 +02:00
|
|
|
(match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
|
2014-07-04 17:56:27 +02:00
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
|
|
|
"@
|
|
|
|
ins\\t%0.d[1], %1.d[0]
|
|
|
|
ins\\t%0.d[1], %1"
|
|
|
|
[(set_attr "type" "neon_ins")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "move_hi_quad_<mode>"
|
|
|
|
[(match_operand:VQ 0 "register_operand" "")
|
|
|
|
(match_operand:<VHALF> 1 "register_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2014-07-04 17:56:27 +02:00
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; Narrowing operations.
|
|
|
|
|
|
|
|
;; For doubles.
|
|
|
|
(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
|
|
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"xtn\\t%0.<Vntype>, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm_narrow_q")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_pack_trunc_<mode>"
|
|
|
|
[(match_operand:<VNARROWD> 0 "register_operand" "")
|
|
|
|
(match_operand:VDN 1 "register_operand" "")
|
|
|
|
(match_operand:VDN 2 "register_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx tempreg = gen_reg_rtx (<VDBL>mode);
|
2013-11-22 16:31:57 +01:00
|
|
|
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
|
|
|
|
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2013-11-22 16:31:57 +01:00
|
|
|
emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
|
|
|
|
emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; For quads.
|
|
|
|
|
|
|
|
(define_insn "vec_pack_trunc_<mode>"
|
2014-07-04 15:36:12 +02:00
|
|
|
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_concat:<VNARROWQ2>
|
|
|
|
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
|
|
|
|
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
|
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:31:57 +01:00
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
|
|
|
|
else
|
|
|
|
return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "multiple")
|
2012-10-23 19:02:30 +02:00
|
|
|
(set_attr "length" "8")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Widening operations.
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "vect_par_cnst_lo_half" "")
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD"
|
2014-11-21 13:29:26 +01:00
|
|
|
"<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "vect_par_cnst_hi_half" "")
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD"
|
2014-11-21 13:29:26 +01:00
|
|
|
"<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_unpack<su>_hi_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_unpack<su>_lo_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Widening arithmetic.
|
|
|
|
|
2013-06-14 18:53:10 +02:00
|
|
|
(define_insn "*aarch64_<su>mlal_lo<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(plus:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 4 "register_operand" "w")
|
|
|
|
(match_dup 3))))
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_<su>mlal_hi<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(plus:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 4 "register_operand" "w")
|
|
|
|
(match_dup 3))))
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_<su>mlsl_lo<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(minus:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 4 "register_operand" "w")
|
|
|
|
(match_dup 3))))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_<su>mlsl_hi<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(minus:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 4 "register_operand" "w")
|
|
|
|
(match_dup 3))))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_<su>mlal<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(plus:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 1 "register_operand" "w"))
|
2013-06-14 18:53:10 +02:00
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 2 "register_operand" "w")))
|
2013-06-14 18:53:10 +02:00
|
|
|
(match_operand:<VWIDE> 3 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_<su>mlsl<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(minus:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 2 "register_operand" "w"))
|
2013-06-14 18:53:10 +02:00
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 3 "register_operand" "w")))))]
|
2013-06-14 18:53:10 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mla_<Vetype>_long")]
|
2013-06-14 18:53:10 +02:00
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_dup 3)))))]
|
|
|
|
"TARGET_SIMD"
|
2013-01-08 17:21:51 +01:00
|
|
|
"<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mul_<Vetype>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_widen_<su>mult_lo_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_dup 3)))))]
|
|
|
|
"TARGET_SIMD"
|
2013-01-08 17:21:51 +01:00
|
|
|
"<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mul_<Vetype>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_widen_<su>mult_hi_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
|
|
|
|
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
;; FP vector operations.
|
|
|
|
;; AArch64 AdvSIMD supports single-precision (32-bit) and
|
|
|
|
;; double-precision (64-bit) floating-point data types and arithmetic as
|
|
|
|
;; defined by the IEEE 754-2008 standard. This makes them vectorizable
|
|
|
|
;; without the need for -ffast-math or -funsafe-math-optimizations.
|
|
|
|
;;
|
|
|
|
;; Floating-point operations can raise an exception. Vectorizing such
|
|
|
|
;; operations are safe because of reasons explained below.
|
|
|
|
;;
|
|
|
|
;; ARMv8 permits an extension to enable trapped floating-point
|
|
|
|
;; exception handling, however this is an optional feature. In the
|
|
|
|
;; event of a floating-point exception being raised by vectorised
|
|
|
|
;; code then:
|
|
|
|
;; 1. If trapped floating-point exceptions are available, then a trap
|
|
|
|
;; will be taken when any lane raises an enabled exception. A trap
|
|
|
|
;; handler may determine which lane raised the exception.
|
|
|
|
;; 2. Alternatively a sticky exception flag is set in the
|
|
|
|
;; floating-point status register (FPSR). Software may explicitly
|
|
|
|
;; test the exception flags, in which case the tests will either
|
|
|
|
;; prevent vectorisation, allowing precise identification of the
|
|
|
|
;; failing operation, or if tested outside of vectorisable regions
|
|
|
|
;; then the specific operation and lane are not of interest.
|
|
|
|
|
|
|
|
;; FP arithmetic operations.
|
|
|
|
|
|
|
|
(define_insn "add<mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "sub<mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "mul<mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_mul_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2016-06-13 21:03:00 +02:00
|
|
|
(define_expand "div<mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2016-06-13 21:03:00 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
|
|
|
|
DONE;
|
|
|
|
|
|
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "*div<mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_div_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "neg<mode>2"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fneg\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_neg_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "abs<mode>2"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fabs\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_abs_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "fma<mode>4"
|
2016-07-25 16:44:24 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 3 "register_operand" "0")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2016-07-25 16:44:24 +02:00
|
|
|
[(set_attr "type" "neon_fp_mla_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-09-16 11:53:11 +02:00
|
|
|
(define_insn "*aarch64_fma4_elt<mode>"
|
|
|
|
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQF
|
|
|
|
(vec_duplicate:VDQF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VDQF 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQF 3 "register_operand" "w")
|
|
|
|
(match_operand:VDQF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(vec_duplicate:VDQSF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQSF 3 "register_operand" "w")
|
|
|
|
(match_operand:VDQSF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
2016-05-17 18:34:46 +02:00
|
|
|
(define_insn "*aarch64_fma4_elt_from_dup<mode>"
|
|
|
|
[(set (match_operand:VMUL 0 "register_operand" "=w")
|
|
|
|
(fma:VMUL
|
|
|
|
(vec_duplicate:VMUL
|
2017-03-16 11:03:11 +01:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
|
2016-05-17 18:34:46 +02:00
|
|
|
(match_operand:VMUL 2 "register_operand" "w")
|
|
|
|
(match_operand:VMUL 3 "register_operand" "0")))]
|
2013-09-16 11:53:11 +02:00
|
|
|
"TARGET_SIMD"
|
2016-05-17 18:34:46 +02:00
|
|
|
"fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_fma4_elt_to_64v2df"
|
|
|
|
[(set (match_operand:DF 0 "register_operand" "=w")
|
|
|
|
(fma:DF
|
|
|
|
(vec_select:DF
|
|
|
|
(match_operand:V2DF 1 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")]))
|
|
|
|
(match_operand:DF 3 "register_operand" "w")
|
|
|
|
(match_operand:DF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "fnma<mode>4"
|
2016-07-25 16:44:24 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(fma:VHSDF
|
2017-11-17 23:55:38 +01:00
|
|
|
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")
|
2016-07-25 16:44:24 +02:00
|
|
|
(match_operand:VHSDF 3 "register_operand" "0")))]
|
2013-09-16 11:53:11 +02:00
|
|
|
"TARGET_SIMD"
|
2016-07-25 16:44:24 +02:00
|
|
|
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_fp_mla_<stype><q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_fnma4_elt<mode>"
|
|
|
|
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQF
|
|
|
|
(neg:VDQF
|
|
|
|
(match_operand:VDQF 3 "register_operand" "w"))
|
|
|
|
(vec_duplicate:VDQF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VDQF 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(neg:VDQSF
|
|
|
|
(match_operand:VDQSF 3 "register_operand" "w"))
|
|
|
|
(vec_duplicate:VDQSF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")])))
|
|
|
|
(match_operand:VDQSF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
2016-05-17 18:34:46 +02:00
|
|
|
(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
|
|
|
|
[(set (match_operand:VMUL 0 "register_operand" "=w")
|
|
|
|
(fma:VMUL
|
|
|
|
(neg:VMUL
|
|
|
|
(match_operand:VMUL 2 "register_operand" "w"))
|
|
|
|
(vec_duplicate:VMUL
|
2017-03-16 11:03:11 +01:00
|
|
|
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
|
2016-05-17 18:34:46 +02:00
|
|
|
(match_operand:VMUL 3 "register_operand" "0")))]
|
2013-09-16 11:53:11 +02:00
|
|
|
"TARGET_SIMD"
|
2016-05-17 18:34:46 +02:00
|
|
|
"fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_fnma4_elt_to_64v2df"
|
|
|
|
[(set (match_operand:DF 0 "register_operand" "=w")
|
|
|
|
(fma:DF
|
|
|
|
(vec_select:DF
|
|
|
|
(match_operand:V2DF 1 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand")]))
|
|
|
|
(neg:DF
|
|
|
|
(match_operand:DF 3 "register_operand" "w"))
|
|
|
|
(match_operand:DF 4 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
|
2013-09-16 11:53:11 +02:00
|
|
|
)
|
|
|
|
|
2013-04-29 12:17:51 +02:00
|
|
|
;; Vector versions of the floating-point frint patterns.
|
2014-04-22 10:39:48 +02:00
|
|
|
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
|
2013-04-29 12:17:51 +02:00
|
|
|
(define_insn "<frint_pattern><mode>2"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
|
|
|
|
FRINT))]
|
[AARCH64] Add support for vectorizable standard math patterns.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): New.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Declare.
* config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add.
(frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise.
(fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_frint_<frint_suffix><mode>): New.
(<frint_pattern><mode>2): Likewise.
(aarch64_fcvt<frint_suffix><su><mode>): Likewise.
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise.
* config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
* config/aarch64/aarch64.md
(btrunc<mode>2, ceil<mode>2, floor<mode>2)
(round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as...
(<frint_pattern><mode>2): ...this.
(lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2)
(lround<su_optab><mode><mode>2)
(lrint<su_optab><mode><mode>2): Consolidate as...
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this.
* config/aarch64/iterators.md (fcvt_target): New.
(FCVT_TARGET): Likewise.
(FRINT): Likewise.
(FCVT): Likewise.
(frint_pattern): Likewise.
(frint_suffix): Likewise.
(fcvt_pattern): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-rounding-btrunc.c: New test.
* gcc.dg/vect/vect-rounding-btruncf.c: Likewise.
* gcc.dg/vect/vect-rounding-ceil.c: Likewise.
* gcc.dg/vect/vect-rounding-ceilf.c: Likewise.
* gcc.dg/vect/vect-rounding-floor.c: Likewise.
* gcc.dg/vect/vect-rounding-floorf.c: Likewise.
* gcc.dg/vect/vect-rounding-lceil.c: Likewise.
* gcc.dg/vect/vect-rounding-lfloor.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyint.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise.
* gcc.dg/vect/vect-rounding-round.c: Likewise.
* gcc.dg/vect/vect-rounding-roundf.c: Likewise.
* target-supports.exp
(check_effective_target_vect_call_btrunc): New.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_ceilf): Likewise.
(check_effective_target_vect_call_floor): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_vect_call_lceil): Likewise.
(check_effective_target_vect_call_lfloor): Likewise.
(check_effective_target_vect_call_nearbyint): Likewise.
(check_effective_target_vect_call_nearbyintf): Likewise.
(check_effective_target_vect_call_round): Likewise.
(check_effective_target_vect_call_roundf): Likewise.
From-SVN: r194197
2012-12-05 11:34:31 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_round_<stype><q>")]
|
[AARCH64] Add support for vectorizable standard math patterns.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): New.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Declare.
* config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add.
(frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise.
(fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_frint_<frint_suffix><mode>): New.
(<frint_pattern><mode>2): Likewise.
(aarch64_fcvt<frint_suffix><su><mode>): Likewise.
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise.
* config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
* config/aarch64/aarch64.md
(btrunc<mode>2, ceil<mode>2, floor<mode>2)
(round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as...
(<frint_pattern><mode>2): ...this.
(lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2)
(lround<su_optab><mode><mode>2)
(lrint<su_optab><mode><mode>2): Consolidate as...
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this.
* config/aarch64/iterators.md (fcvt_target): New.
(FCVT_TARGET): Likewise.
(FRINT): Likewise.
(FCVT): Likewise.
(frint_pattern): Likewise.
(frint_suffix): Likewise.
(fcvt_pattern): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-rounding-btrunc.c: New test.
* gcc.dg/vect/vect-rounding-btruncf.c: Likewise.
* gcc.dg/vect/vect-rounding-ceil.c: Likewise.
* gcc.dg/vect/vect-rounding-ceilf.c: Likewise.
* gcc.dg/vect/vect-rounding-floor.c: Likewise.
* gcc.dg/vect/vect-rounding-floorf.c: Likewise.
* gcc.dg/vect/vect-rounding-lceil.c: Likewise.
* gcc.dg/vect/vect-rounding-lfloor.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyint.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise.
* gcc.dg/vect/vect-rounding-round.c: Likewise.
* gcc.dg/vect/vect-rounding-roundf.c: Likewise.
* target-supports.exp
(check_effective_target_vect_call_btrunc): New.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_ceilf): Likewise.
(check_effective_target_vect_call_floor): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_vect_call_lceil): Likewise.
(check_effective_target_vect_call_lfloor): Likewise.
(check_effective_target_vect_call_nearbyint): Likewise.
(check_effective_target_vect_call_nearbyintf): Likewise.
(check_effective_target_vect_call_round): Likewise.
(check_effective_target_vect_call_roundf): Likewise.
From-SVN: r194197
2012-12-05 11:34:31 +01:00
|
|
|
)
|
|
|
|
|
[AArch64] Map fcvt intrinsics to builtin name directly.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Use new names for
fcvt builtins.
* config/aarch64/aarch64-simd-builtins.def (fcvtzs): Split as...
(lbtruncv2sf, lbtruncv4sf, lbtruncv2df): ...This.
(fcvtzu): Split as...
(lbtruncuv2sf, lbtruncuv4sf, lbtruncuv2df): ...This.
(fcvtas): Split as...
(lroundv2sf, lroundv4sf, lroundv2df, lroundsf, lrounddf): ...This.
(fcvtau): Split as...
(lrounduv2sf, lrounduv4sf, lrounduv2df, lroundusf, lroundudf): ...This.
(fcvtps): Split as...
(lceilv2sf, lceilv4sf, lceilv2df): ...This.
(fcvtpu): Split as...
(lceiluv2sf, lceiluv4sf, lceiluv2df, lceilusf, lceiludf): ...This.
(fcvtms): Split as...
(lfloorv2sf, lfloorv4sf, lfloorv2df): ...This.
(fcvtmu): Split as...
(lflooruv2sf, lflooruv4sf, lflooruv2df, lfloorusf, lfloorudf): ...This.
(lfrintnv2sf, lfrintnv4sf, lfrintnv2df, lfrintnsf, lfrintndf): New.
(lfrintnuv2sf, lfrintnuv4sf, lfrintnuv2df): Likewise.
(lfrintnusf, lfrintnudf): Likewise.
* config/aarch64/aarch64-simd.md
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Convert to
define_insn.
(aarch64_fcvt<frint_suffix><su><mode>): Remove.
* config/aarch64/iterators.md (FCVT): Include UNSPEC_FRINTN.
(fcvt_pattern): Likewise.
From-SVN: r198398
2013-04-29 12:51:46 +02:00
|
|
|
;; Vector versions of the fcvt standard patterns.
|
|
|
|
;; Expands to lbtrunc, lround, lceil, lfloor
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
|
[AARCH64] Add support for vectorizable standard math patterns.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): New.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Declare.
* config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add.
(frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise.
(fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_frint_<frint_suffix><mode>): New.
(<frint_pattern><mode>2): Likewise.
(aarch64_fcvt<frint_suffix><su><mode>): Likewise.
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise.
* config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
* config/aarch64/aarch64.md
(btrunc<mode>2, ceil<mode>2, floor<mode>2)
(round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as...
(<frint_pattern><mode>2): ...this.
(lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2)
(lround<su_optab><mode><mode>2)
(lrint<su_optab><mode><mode>2): Consolidate as...
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this.
* config/aarch64/iterators.md (fcvt_target): New.
(FCVT_TARGET): Likewise.
(FRINT): Likewise.
(FCVT): Likewise.
(frint_pattern): Likewise.
(frint_suffix): Likewise.
(fcvt_pattern): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-rounding-btrunc.c: New test.
* gcc.dg/vect/vect-rounding-btruncf.c: Likewise.
* gcc.dg/vect/vect-rounding-ceil.c: Likewise.
* gcc.dg/vect/vect-rounding-ceilf.c: Likewise.
* gcc.dg/vect/vect-rounding-floor.c: Likewise.
* gcc.dg/vect/vect-rounding-floorf.c: Likewise.
* gcc.dg/vect/vect-rounding-lceil.c: Likewise.
* gcc.dg/vect/vect-rounding-lfloor.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyint.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise.
* gcc.dg/vect/vect-rounding-round.c: Likewise.
* gcc.dg/vect/vect-rounding-roundf.c: Likewise.
* target-supports.exp
(check_effective_target_vect_call_btrunc): New.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_ceilf): Likewise.
(check_effective_target_vect_call_floor): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_vect_call_lceil): Likewise.
(check_effective_target_vect_call_lfloor): Likewise.
(check_effective_target_vect_call_nearbyint): Likewise.
(check_effective_target_vect_call_nearbyintf): Likewise.
(check_effective_target_vect_call_round): Likewise.
(check_effective_target_vect_call_roundf): Likewise.
From-SVN: r194197
2012-12-05 11:34:31 +01:00
|
|
|
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
|
|
|
|
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(match_operand:VHSDF 1 "register_operand" "w")]
|
[AARCH64] Add support for vectorizable standard math patterns.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): New.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Declare.
* config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add.
(frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise.
(fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_frint_<frint_suffix><mode>): New.
(<frint_pattern><mode>2): Likewise.
(aarch64_fcvt<frint_suffix><su><mode>): Likewise.
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise.
* config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
* config/aarch64/aarch64.md
(btrunc<mode>2, ceil<mode>2, floor<mode>2)
(round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as...
(<frint_pattern><mode>2): ...this.
(lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2)
(lround<su_optab><mode><mode>2)
(lrint<su_optab><mode><mode>2): Consolidate as...
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this.
* config/aarch64/iterators.md (fcvt_target): New.
(FCVT_TARGET): Likewise.
(FRINT): Likewise.
(FCVT): Likewise.
(frint_pattern): Likewise.
(frint_suffix): Likewise.
(fcvt_pattern): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-rounding-btrunc.c: New test.
* gcc.dg/vect/vect-rounding-btruncf.c: Likewise.
* gcc.dg/vect/vect-rounding-ceil.c: Likewise.
* gcc.dg/vect/vect-rounding-ceilf.c: Likewise.
* gcc.dg/vect/vect-rounding-floor.c: Likewise.
* gcc.dg/vect/vect-rounding-floorf.c: Likewise.
* gcc.dg/vect/vect-rounding-lceil.c: Likewise.
* gcc.dg/vect/vect-rounding-lfloor.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyint.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise.
* gcc.dg/vect/vect-rounding-round.c: Likewise.
* gcc.dg/vect/vect-rounding-roundf.c: Likewise.
* target-supports.exp
(check_effective_target_vect_call_btrunc): New.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_ceilf): Likewise.
(check_effective_target_vect_call_floor): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_vect_call_lceil): Likewise.
(check_effective_target_vect_call_lfloor): Likewise.
(check_effective_target_vect_call_nearbyint): Likewise.
(check_effective_target_vect_call_nearbyintf): Likewise.
(check_effective_target_vect_call_round): Likewise.
(check_effective_target_vect_call_roundf): Likewise.
From-SVN: r194197
2012-12-05 11:34:31 +01:00
|
|
|
FCVT)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_to_int_<stype><q>")]
|
[AARCH64] Add support for vectorizable standard math patterns.
gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): New.
* config/aarch64/aarch64-protos.h
(aarch64_builtin_vectorized_function): Declare.
* config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add.
(frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise.
(fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_frint_<frint_suffix><mode>): New.
(<frint_pattern><mode>2): Likewise.
(aarch64_fcvt<frint_suffix><su><mode>): Likewise.
(l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise.
* config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise.
* config/aarch64/aarch64.md
(btrunc<mode>2, ceil<mode>2, floor<mode>2)
(round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as...
(<frint_pattern><mode>2): ...this.
(lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2)
(lround<su_optab><mode><mode>2)
(lrint<su_optab><mode><mode>2): Consolidate as...
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this.
* config/aarch64/iterators.md (fcvt_target): New.
(FCVT_TARGET): Likewise.
(FRINT): Likewise.
(FCVT): Likewise.
(frint_pattern): Likewise.
(frint_suffix): Likewise.
(fcvt_pattern): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-rounding-btrunc.c: New test.
* gcc.dg/vect/vect-rounding-btruncf.c: Likewise.
* gcc.dg/vect/vect-rounding-ceil.c: Likewise.
* gcc.dg/vect/vect-rounding-ceilf.c: Likewise.
* gcc.dg/vect/vect-rounding-floor.c: Likewise.
* gcc.dg/vect/vect-rounding-floorf.c: Likewise.
* gcc.dg/vect/vect-rounding-lceil.c: Likewise.
* gcc.dg/vect/vect-rounding-lfloor.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyint.c: Likewise.
* gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise.
* gcc.dg/vect/vect-rounding-round.c: Likewise.
* gcc.dg/vect/vect-rounding-roundf.c: Likewise.
* target-supports.exp
(check_effective_target_vect_call_btrunc): New.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_ceilf): Likewise.
(check_effective_target_vect_call_floor): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_vect_call_lceil): Likewise.
(check_effective_target_vect_call_lfloor): Likewise.
(check_effective_target_vect_call_nearbyint): Likewise.
(check_effective_target_vect_call_nearbyintf): Likewise.
(check_effective_target_vect_call_round): Likewise.
(check_effective_target_vect_call_roundf): Likewise.
From-SVN: r194197
2012-12-05 11:34:31 +01:00
|
|
|
)
|
|
|
|
|
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
gcc/
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
mode.
(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise.
* config/aarch64/arm_neon.h: Include arm_fp16.h.
* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
Support HF mode.
* config/aarch64/arm_fp16.h: New file.
(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
vsqrth_f16): New.
From-SVN: r238722
2016-07-25 18:00:28 +02:00
|
|
|
;; HF Scalar variants of related SIMD instructions.
|
|
|
|
(define_insn "l<fcvt_pattern><su_optab>hfhi2"
|
|
|
|
[(set (match_operand:HI 0 "register_operand" "=w")
|
|
|
|
(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
|
|
|
|
FCVT)))]
|
|
|
|
"TARGET_SIMD_F16INST"
|
|
|
|
"fcvt<frint_suffix><su>\t%h0, %h1"
|
|
|
|
[(set_attr "type" "neon_fp_to_int_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "<optab>_trunchfhi2"
|
|
|
|
[(set (match_operand:HI 0 "register_operand" "=w")
|
|
|
|
(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD_F16INST"
|
|
|
|
"fcvtz<su>\t%h0, %h1"
|
|
|
|
[(set_attr "type" "neon_fp_to_int_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "<optab>hihf2"
|
|
|
|
[(set (match_operand:HF 0 "register_operand" "=w")
|
|
|
|
(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD_F16INST"
|
|
|
|
"<su_optab>cvtf\t%h0, %h1"
|
|
|
|
[(set_attr "type" "neon_int_to_fp_s")]
|
|
|
|
)
|
|
|
|
|
2015-10-20 18:01:53 +02:00
|
|
|
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
|
|
|
|
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
|
|
|
|
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
|
|
|
|
[(mult:VDQF
|
|
|
|
(match_operand:VDQF 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
|
|
|
|
UNSPEC_FRINTZ)))]
|
|
|
|
"TARGET_SIMD
|
|
|
|
&& IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
|
|
|
|
GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
|
|
|
|
{
|
|
|
|
int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
|
|
|
|
char buf[64];
|
|
|
|
snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
|
|
|
|
output_asm_insn (buf, operands);
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
|
|
|
|
)
|
|
|
|
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
(define_expand "<optab><VHSDF:mode><fcvt_target>2"
|
2013-04-29 13:04:56 +02:00
|
|
|
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
|
|
|
|
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(match_operand:VHSDF 1 "register_operand")]
|
|
|
|
UNSPEC_FRINTZ)))]
|
2013-04-29 13:04:56 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{})
|
|
|
|
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
|
2013-04-29 13:04:56 +02:00
|
|
|
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
|
|
|
|
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(match_operand:VHSDF 1 "register_operand")]
|
|
|
|
UNSPEC_FRINTZ)))]
|
2013-04-29 13:04:56 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{})
|
|
|
|
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
(define_expand "ftrunc<VHSDF:mode>2"
|
|
|
|
[(set (match_operand:VHSDF 0 "register_operand")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
|
|
|
|
UNSPEC_FRINTZ))]
|
2013-04-29 13:04:56 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{})
|
|
|
|
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
(define_insn "<optab><fcvt_target><VHSDF:mode>2"
|
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(FLOATUORS:VHSDF
|
2013-04-29 12:54:32 +02:00
|
|
|
(match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_int_to_fp_<stype><q>")]
|
2013-04-29 12:54:32 +02:00
|
|
|
)
|
|
|
|
|
2013-04-29 12:57:59 +02:00
|
|
|
;; Conversions between vectors of floats and doubles.
|
|
|
|
;; Contains a mix of patterns to match standard pattern names
|
|
|
|
;; and those for intrinsics.
|
|
|
|
|
|
|
|
;; Float widening operations.
|
|
|
|
|
2015-09-08 21:24:35 +02:00
|
|
|
(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(float_extend:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSF 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
|
|
|
|
)))]
|
2013-04-29 12:57:59 +02:00
|
|
|
"TARGET_SIMD"
|
2015-09-08 21:24:35 +02:00
|
|
|
"fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_cvt_widen_s")]
|
2013-04-29 12:57:59 +02:00
|
|
|
)
|
|
|
|
|
2016-06-08 12:11:09 +02:00
|
|
|
;; Convert between fixed-point and floating-point (vector modes)
|
|
|
|
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
|
|
|
|
[(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VHSDF:FCVT_TARGET>
|
|
|
|
[(match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
2016-06-08 12:11:09 +02:00
|
|
|
FCVT_F2FIXED))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
|
2016-06-08 12:11:09 +02:00
|
|
|
)
|
|
|
|
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
|
|
|
|
[(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VDQ_HSDI:FCVT_TARGET>
|
|
|
|
[(match_operand:VDQ_HSDI 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
2016-06-08 12:11:09 +02:00
|
|
|
FCVT_FIXED2F))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
|
2016-06-08 12:11:09 +02:00
|
|
|
)
|
|
|
|
|
2015-09-23 13:39:48 +02:00
|
|
|
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
|
|
|
|
;; is inconsistent with vector ordering elsewhere in the compiler, in that
|
|
|
|
;; the meaning of HI and LO changes depending on the target endianness.
|
|
|
|
;; While elsewhere we map the higher numbered elements of a vector to
|
|
|
|
;; the lower architectural lanes of the vector, for these patterns we want
|
|
|
|
;; to always treat "hi" as referring to the higher architectural lanes.
|
|
|
|
;; Consequently, while the patterns below look inconsistent with our
|
2016-02-12 00:53:54 +01:00
|
|
|
;; other big-endian patterns their behavior is as required.
|
2015-09-23 13:39:48 +02:00
|
|
|
|
2015-09-08 21:24:35 +02:00
|
|
|
(define_expand "vec_unpacks_lo_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(match_operand:VQ_HSF 1 "register_operand" "")]
|
2013-04-29 12:57:59 +02:00
|
|
|
"TARGET_SIMD"
|
2015-09-08 21:24:35 +02:00
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2015-09-08 21:24:35 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(float_extend:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSF 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_cvt_widen_s")]
|
2013-04-29 12:57:59 +02:00
|
|
|
)
|
|
|
|
|
2015-09-08 21:24:35 +02:00
|
|
|
(define_expand "vec_unpacks_hi_<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(match_operand:VQ_HSF 1 "register_operand" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2015-09-08 21:24:35 +02:00
|
|
|
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
|
|
|
|
operands[1], p));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
(define_insn "aarch64_float_extend_lo_<Vwide>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(float_extend:<VWIDE>
|
|
|
|
(match_operand:VDF 1 "register_operand" "w")))]
|
2013-04-29 12:57:59 +02:00
|
|
|
"TARGET_SIMD"
|
2015-09-08 21:24:35 +02:00
|
|
|
"fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_cvt_widen_s")]
|
2013-04-29 12:57:59 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; Float narrowing operations.
|
|
|
|
|
2015-09-08 21:08:34 +02:00
|
|
|
(define_insn "aarch64_float_truncate_lo_<mode>"
|
|
|
|
[(set (match_operand:VDF 0 "register_operand" "=w")
|
|
|
|
(float_truncate:VDF
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")))]
|
2013-04-29 12:57:59 +02:00
|
|
|
"TARGET_SIMD"
|
2015-09-08 21:08:34 +02:00
|
|
|
"fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
|
2013-04-29 12:57:59 +02:00
|
|
|
)
|
|
|
|
|
2015-09-23 13:39:48 +02:00
|
|
|
(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
|
2015-09-08 21:08:34 +02:00
|
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
|
|
|
|
(vec_concat:<VDBL>
|
|
|
|
(match_operand:VDF 1 "register_operand" "0")
|
|
|
|
(float_truncate:VDF
|
|
|
|
(match_operand:<VWIDE> 2 "register_operand" "w"))))]
|
2015-09-23 13:39:48 +02:00
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
2015-09-08 21:08:34 +02:00
|
|
|
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
|
2013-04-29 12:57:59 +02:00
|
|
|
)
|
|
|
|
|
2015-09-23 13:39:48 +02:00
|
|
|
(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
|
|
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
|
|
|
|
(vec_concat:<VDBL>
|
|
|
|
(float_truncate:VDF
|
|
|
|
(match_operand:<VWIDE> 2 "register_operand" "w"))
|
|
|
|
(match_operand:VDF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
|
|
|
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
|
|
|
|
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_float_truncate_hi_<Vdbl>"
|
|
|
|
[(match_operand:<VDBL> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VDF 1 "register_operand" "0")
|
|
|
|
(match_operand:<VWIDE> 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
|
|
|
|
? gen_aarch64_float_truncate_hi_<Vdbl>_be
|
|
|
|
: gen_aarch64_float_truncate_hi_<Vdbl>_le;
|
|
|
|
emit_insn (gen (operands[0], operands[1], operands[2]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2013-04-29 12:57:59 +02:00
|
|
|
(define_expand "vec_pack_trunc_v2df"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand")
|
|
|
|
(vec_concat:V4SF
|
|
|
|
(float_truncate:V2SF
|
|
|
|
(match_operand:V2DF 1 "register_operand"))
|
|
|
|
(float_truncate:V2SF
|
|
|
|
(match_operand:V2DF 2 "register_operand"))
|
|
|
|
))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (V2SFmode);
|
2013-11-22 16:31:57 +01:00
|
|
|
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
|
|
|
|
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
|
2013-04-29 12:57:59 +02:00
|
|
|
emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
|
2013-11-22 16:31:57 +01:00
|
|
|
tmp, operands[hi]));
|
2013-04-29 12:57:59 +02:00
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "vec_pack_trunc_df"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand")
|
|
|
|
(vec_concat:V2SF
|
|
|
|
(float_truncate:SF
|
|
|
|
(match_operand:DF 1 "register_operand"))
|
|
|
|
(float_truncate:SF
|
|
|
|
(match_operand:DF 2 "register_operand"))
|
|
|
|
))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (V2SFmode);
|
2013-11-22 16:31:57 +01:00
|
|
|
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
|
|
|
|
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
|
|
|
|
|
|
|
|
emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
|
|
|
|
emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
|
2013-04-29 12:57:59 +02:00
|
|
|
emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; FP Max/Min
|
|
|
|
;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
|
|
|
|
;; expression like:
|
|
|
|
;; a = (b < c) ? b : c;
|
|
|
|
;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
|
|
|
|
;; either explicitly or indirectly via -ffast-math.
|
|
|
|
;;
|
|
|
|
;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
|
|
|
|
;; The 'smax' and 'smin' RTL standard pattern names do not specify which
|
|
|
|
;; operand will be returned when both operands are zero (i.e. they may not
|
|
|
|
;; honour signed zeroes), or when either operand is NaN. Therefore GCC
|
|
|
|
;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
|
|
|
|
;; NaNs.
|
|
|
|
|
2013-05-01 17:16:14 +02:00
|
|
|
(define_insn "<su><maxmin><mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:16:14 +02:00
|
|
|
"f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2016-08-02 11:25:19 +02:00
|
|
|
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
|
|
|
|
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
|
|
|
|
;; which implement the IEEE fmax ()/fmin () functions.
|
2013-05-01 17:16:14 +02:00
|
|
|
(define_insn "<maxmin_uns><mode>3"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
|
|
|
FMAXMIN_UNS))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:16:14 +02:00
|
|
|
"<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-05-01 17:37:52 +02:00
|
|
|
;; 'across lanes' add.
|
2012-10-23 19:02:30 +02:00
|
|
|
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
(define_expand "reduc_plus_scal_<mode>"
|
|
|
|
[(match_operand:<VEL> 0 "register_operand" "=w")
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
UNSPEC_ADDV)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
rtx scratch = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
|
|
|
|
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2016-06-08 12:17:58 +02:00
|
|
|
(define_insn "aarch64_faddp<mode>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
|
|
|
UNSPEC_FADDV))]
|
2016-06-08 12:17:58 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
|
2016-06-08 12:17:58 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
(define_insn "aarch64_reduc_plus_internal<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:VDQV 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
UNSPEC_ADDV))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
aarch64-simd.md (vec_pack_trunc_<mode>, [...]): Swap for big-endian.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
gcc/
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
(reduc_<sur>plus_<mode>): Factorize V2DI into this.
(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
and also change to float UNSPEC.
(reduc_maxmin_uns>_<mode>): Remove V2DI.
* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
result access for big-endian.
(__LANE0): New macro used to fix up lane access of 'across-lanes'
intrinsics for big-endian.
* config/aarch64/iterators.md (VDQV): Add V2DI.
(VDQV_S): New.
(vp): New mode attribute.
From-SVN: r205269
2013-11-22 16:34:36 +01:00
|
|
|
"add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_add<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
(define_insn "aarch64_reduc_plus_internalv2si"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
UNSPEC_ADDV))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"addp\\t%0.2s, %1.2s, %1.2s"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_add")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2016-05-18 10:00:33 +02:00
|
|
|
(define_insn "reduc_plus_scal_<mode>"
|
|
|
|
[(set (match_operand:<VEL> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
|
aarch64-simd.md (vec_pack_trunc_<mode>, [...]): Swap for big-endian.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
gcc/
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
(reduc_<sur>plus_<mode>): Factorize V2DI into this.
(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
and also change to float UNSPEC.
(reduc_maxmin_uns>_<mode>): Remove V2DI.
* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
result access for big-endian.
(__LANE0): New macro used to fix up lane access of 'across-lanes'
intrinsics for big-endian.
* config/aarch64/iterators.md (VDQV): Add V2DI.
(VDQV_S): New.
(vp): New mode attribute.
From-SVN: r205269
2013-11-22 16:34:36 +01:00
|
|
|
UNSPEC_FADDV))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:37:52 +02:00
|
|
|
"faddp\\t%<Vetype>0, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
(define_expand "reduc_plus_scal_v4sf"
|
|
|
|
[(set (match_operand:SF 0 "register_operand")
|
2013-05-01 17:37:52 +02:00
|
|
|
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
|
aarch64-simd.md (vec_pack_trunc_<mode>, [...]): Swap for big-endian.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
gcc/
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
(reduc_<sur>plus_<mode>): Factorize V2DI into this.
(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
and also change to float UNSPEC.
(reduc_maxmin_uns>_<mode>): Remove V2DI.
* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
result access for big-endian.
(__LANE0): New macro used to fix up lane access of 'across-lanes'
intrinsics for big-endian.
* config/aarch64/iterators.md (VDQV): Add V2DI.
(VDQV_S): New.
(vp): New mode attribute.
From-SVN: r205269
2013-11-22 16:34:36 +01:00
|
|
|
UNSPEC_FADDV))]
|
2013-05-01 17:37:52 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
rtx scratch = gen_reg_rtx (V4SFmode);
|
2016-06-08 12:17:58 +02:00
|
|
|
emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
|
|
|
|
emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
|
[AArch64] Use new reduc_plus_scal optabs, inc. for __builtins
* config/aarch64/aarch64-simd-builtins.def
(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
Remove.
(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
(aarch64_reduc_plus_internalv2si): ...this.
(reduc_splus_<mode>/V2F): Rename to...
(aarch64_reduc_plus_internal<mode>): ...this.
* config/aarch64/iterators.md
(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
(UNSPEC_ADDV): New.
(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
From-SVN: r216738
2014-10-27 16:20:18 +01:00
|
|
|
emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
|
2013-05-01 17:37:52 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
aarch64-simd.md (clrsb<mode>2, [...]): New patterns.
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
patterns.
* config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
builtins.
* config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
vcntq_u8): Rewrite using builtin functions.
Co-Authored-By: Shanyao Chen <chenshanyao@huawei.com>
From-SVN: r218464
2014-12-07 16:01:23 +01:00
|
|
|
(define_insn "clrsb<mode>2"
|
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"cls\\t%0.<Vtype>, %1.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_cls<q>")]
|
|
|
|
)
|
|
|
|
|
2013-05-23 15:36:41 +02:00
|
|
|
(define_insn "clz<mode>2"
|
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"clz\\t%0.<Vtype>, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_cls<q>")]
|
2013-05-23 15:36:41 +02:00
|
|
|
)
|
|
|
|
|
aarch64-simd.md (clrsb<mode>2, [...]): New patterns.
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
patterns.
* config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
builtins.
* config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
vcntq_u8): Rewrite using builtin functions.
Co-Authored-By: Shanyao Chen <chenshanyao@huawei.com>
From-SVN: r218464
2014-12-07 16:01:23 +01:00
|
|
|
(define_insn "popcount<mode>2"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(popcount:VB (match_operand:VB 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"cnt\\t%0.<Vbtype>, %1.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_cnt<q>")]
|
|
|
|
)
|
|
|
|
|
2013-05-01 17:16:14 +02:00
|
|
|
;; 'across lanes' max and min ops.
|
|
|
|
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
;; Template for outputting a scalar, so we can create __builtins which can be
|
2017-11-22 20:33:31 +01:00
|
|
|
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
|
|
|
|
[(match_operand:<VEL> 0 "register_operand")
|
2016-07-25 17:00:14 +02:00
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
|
|
|
|
FMAXMINV)]
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
rtx scratch = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
|
|
|
|
operands[1]));
|
|
|
|
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Likewise for integer cases, signed and unsigned.
|
|
|
|
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
|
|
|
|
[(match_operand:<VEL> 0 "register_operand")
|
|
|
|
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
|
|
|
|
MAXMINV)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
rtx scratch = gen_reg_rtx (<MODE>mode);
|
|
|
|
emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
|
|
|
|
operands[1]));
|
|
|
|
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
|
aarch64-simd.md (vec_pack_trunc_<mode>, [...]): Swap for big-endian.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
gcc/
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
(reduc_<sur>plus_<mode>): Factorize V2DI into this.
(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
and also change to float UNSPEC.
(reduc_maxmin_uns>_<mode>): Remove V2DI.
* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
result access for big-endian.
(__LANE0): New macro used to fix up lane access of 'across-lanes'
intrinsics for big-endian.
* config/aarch64/iterators.md (VDQV): Add V2DI.
(VDQV_S): New.
(vp): New mode attribute.
From-SVN: r205269
2013-11-22 16:34:36 +01:00
|
|
|
[(set (match_operand:VDQV_S 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
|
2012-10-23 19:02:30 +02:00
|
|
|
MAXMINV))]
|
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:16:14 +02:00
|
|
|
"<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_minmax<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
|
|
|
MAXMINV))]
|
|
|
|
"TARGET_SIMD"
|
2013-05-01 17:16:14 +02:00
|
|
|
"<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_minmax")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
|
2016-07-25 17:00:14 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
|
|
|
|
FMAXMINV))]
|
2013-05-01 17:16:14 +02:00
|
|
|
"TARGET_SIMD"
|
[AArch64] Use new reduc_[us](min|max)_scal optabs, inc. for builtins
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
* config/aarch64/aarch64-simd.md
(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
(reduc_<maxmin_uns>_internal<mode>): ...this.
(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
(reduc_<maxmin_uns>_scal_<mode>): New (*2).
(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
From-SVN: r216741
2014-10-27 16:45:16 +01:00
|
|
|
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
|
2016-07-25 17:00:14 +02:00
|
|
|
[(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
|
2013-05-01 17:16:14 +02:00
|
|
|
)
|
|
|
|
|
2013-04-25 18:54:32 +02:00
|
|
|
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
|
|
|
|
;; allocation.
|
|
|
|
;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
|
|
|
|
;; to select.
|
|
|
|
;;
|
|
|
|
;; Thus our BSL is of the form:
|
|
|
|
;; op0 = bsl (mask, op2, op3)
|
2012-10-30 13:31:49 +01:00
|
|
|
;; We can use any of:
|
2013-04-25 18:54:32 +02:00
|
|
|
;;
|
|
|
|
;; if (op0 = mask)
|
|
|
|
;; bsl mask, op1, op2
|
|
|
|
;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
|
|
|
|
;; bit op0, op2, mask
|
|
|
|
;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
|
|
|
|
;; bif op0, op1, mask
|
2015-12-16 16:03:44 +01:00
|
|
|
;;
|
|
|
|
;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
|
|
|
|
;; Some forms of straight-line code may generate the equivalent form
|
|
|
|
;; in *aarch64_simd_bsl<mode>_alt.
|
2012-10-30 13:31:49 +01:00
|
|
|
|
|
|
|
(define_insn "aarch64_simd_bsl<mode>_internal"
|
2017-11-14 15:09:57 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
|
|
|
|
(xor:VDQ_I
|
|
|
|
(and:VDQ_I
|
|
|
|
(xor:VDQ_I
|
2017-08-31 11:52:38 +02:00
|
|
|
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
|
2017-11-14 15:09:57 +01:00
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w,w,0"))
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "0,w,w"))
|
2017-08-31 11:52:38 +02:00
|
|
|
(match_dup:<V_INT_EQUIV> 3)
|
2013-04-25 18:54:32 +02:00
|
|
|
))]
|
2012-10-30 13:31:49 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
|
|
|
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
|
|
|
|
bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
|
|
|
|
bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_bsl<q>")]
|
2012-10-30 13:31:49 +01:00
|
|
|
)
|
|
|
|
|
2015-12-16 16:03:44 +01:00
|
|
|
;; We need this form in addition to the above pattern to match the case
|
|
|
|
;; when combine tries merging three insns such that the second operand of
|
|
|
|
;; the outer XOR matches the second operand of the inner XOR rather than
|
|
|
|
;; the first. The two are equivalent but since recog doesn't try all
|
|
|
|
;; permutations of commutative operations, we have to have a separate pattern.
|
|
|
|
|
|
|
|
(define_insn "*aarch64_simd_bsl<mode>_alt"
|
2017-11-14 15:09:57 +01:00
|
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
|
|
|
|
(xor:VDQ_I
|
|
|
|
(and:VDQ_I
|
|
|
|
(xor:VDQ_I
|
|
|
|
(match_operand:VDQ_I 3 "register_operand" "w,w,0")
|
|
|
|
(match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "0,w,w"))
|
|
|
|
(match_dup:<V_INT_EQUIV> 2)))]
|
2015-12-16 16:03:44 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
|
|
|
bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
|
|
|
|
bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
|
|
|
|
bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_bsl<q>")]
|
|
|
|
)
|
|
|
|
|
2017-11-14 15:09:57 +01:00
|
|
|
;; DImode is special, we want to avoid computing operations which are
|
|
|
|
;; more naturally computed in general purpose registers in the vector
|
|
|
|
;; registers. If we do that, we need to move all three operands from general
|
|
|
|
;; purpose registers to vector registers, then back again. However, we
|
|
|
|
;; don't want to make this pattern an UNSPEC as we'd lose scope for
|
|
|
|
;; optimizations based on the component operations of a BSL.
|
|
|
|
;;
|
|
|
|
;; That means we need a splitter back to the individual operations, if they
|
|
|
|
;; would be better calculated on the integer side.
|
|
|
|
|
|
|
|
(define_insn_and_split "aarch64_simd_bsldi_internal"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
|
|
|
|
(xor:DI
|
|
|
|
(and:DI
|
|
|
|
(xor:DI
|
|
|
|
(match_operand:DI 3 "register_operand" "w,0,w,r")
|
|
|
|
(match_operand:DI 2 "register_operand" "w,w,0,r"))
|
|
|
|
(match_operand:DI 1 "register_operand" "0,w,w,r"))
|
|
|
|
(match_dup:DI 3)
|
|
|
|
))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
|
|
|
bsl\\t%0.8b, %2.8b, %3.8b
|
|
|
|
bit\\t%0.8b, %2.8b, %1.8b
|
|
|
|
bif\\t%0.8b, %3.8b, %1.8b
|
|
|
|
#"
|
2017-12-05 15:40:37 +01:00
|
|
|
"&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
2017-11-14 15:09:57 +01:00
|
|
|
[(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
|
|
|
|
{
|
|
|
|
/* Split back to individual operations. If we're before reload, and
|
|
|
|
able to create a temporary register, do so. If we're after reload,
|
|
|
|
we've got an early-clobber destination register, so use that.
|
|
|
|
Otherwise, we can't create pseudos and we can't yet guarantee that
|
|
|
|
operands[0] is safe to write, so FAIL to split. */
|
|
|
|
|
|
|
|
rtx scratch;
|
|
|
|
if (reload_completed)
|
|
|
|
scratch = operands[0];
|
|
|
|
else if (can_create_pseudo_p ())
|
|
|
|
scratch = gen_reg_rtx (DImode);
|
|
|
|
else
|
|
|
|
FAIL;
|
|
|
|
|
|
|
|
emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
|
|
|
|
emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
|
|
|
|
emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
|
|
|
|
(set_attr "length" "4,4,4,12")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn_and_split "aarch64_simd_bsldi_alt"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
|
|
|
|
(xor:DI
|
|
|
|
(and:DI
|
|
|
|
(xor:DI
|
|
|
|
(match_operand:DI 3 "register_operand" "w,w,0,r")
|
|
|
|
(match_operand:DI 2 "register_operand" "w,0,w,r"))
|
|
|
|
(match_operand:DI 1 "register_operand" "0,w,w,r"))
|
|
|
|
(match_dup:DI 2)
|
|
|
|
))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
|
|
|
bsl\\t%0.8b, %3.8b, %2.8b
|
|
|
|
bit\\t%0.8b, %3.8b, %1.8b
|
|
|
|
bif\\t%0.8b, %2.8b, %1.8b
|
|
|
|
#"
|
2017-12-05 15:40:37 +01:00
|
|
|
"&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
2017-11-14 15:09:57 +01:00
|
|
|
[(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
|
|
|
|
{
|
|
|
|
/* Split back to individual operations. If we're before reload, and
|
|
|
|
able to create a temporary register, do so. If we're after reload,
|
|
|
|
we've got an early-clobber destination register, so use that.
|
|
|
|
Otherwise, we can't create pseudos and we can't yet guarantee that
|
|
|
|
operands[0] is safe to write, so FAIL to split. */
|
|
|
|
|
|
|
|
rtx scratch;
|
|
|
|
if (reload_completed)
|
|
|
|
scratch = operands[0];
|
|
|
|
else if (can_create_pseudo_p ())
|
|
|
|
scratch = gen_reg_rtx (DImode);
|
|
|
|
else
|
|
|
|
FAIL;
|
|
|
|
|
|
|
|
emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
|
|
|
|
emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
|
|
|
|
emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
|
|
|
|
(set_attr "length" "4,4,4,12")]
|
|
|
|
)
|
|
|
|
|
2012-10-30 13:31:49 +01:00
|
|
|
(define_expand "aarch64_simd_bsl<mode>"
|
2013-11-26 11:03:14 +01:00
|
|
|
[(match_operand:VALLDIF 0 "register_operand")
|
2017-08-31 11:52:38 +02:00
|
|
|
(match_operand:<V_INT_EQUIV> 1 "register_operand")
|
2013-11-26 11:03:14 +01:00
|
|
|
(match_operand:VALLDIF 2 "register_operand")
|
|
|
|
(match_operand:VALLDIF 3 "register_operand")]
|
2013-04-25 18:54:32 +02:00
|
|
|
"TARGET_SIMD"
|
2012-10-30 13:31:49 +01:00
|
|
|
{
|
|
|
|
/* We can't alias operands together if they have different modes. */
|
2014-11-11 18:37:35 +01:00
|
|
|
rtx tmp = operands[0];
|
|
|
|
if (FLOAT_MODE_P (<MODE>mode))
|
|
|
|
{
|
2017-08-31 11:52:38 +02:00
|
|
|
operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
|
|
|
|
operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
|
|
|
|
tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2014-11-11 18:37:35 +01:00
|
|
|
}
|
2017-08-31 11:52:38 +02:00
|
|
|
operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
|
|
|
|
emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3]));
|
2014-11-11 18:37:35 +01:00
|
|
|
if (tmp != operands[0])
|
|
|
|
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
|
|
|
|
|
2013-04-25 18:54:32 +02:00
|
|
|
DONE;
|
2012-10-30 13:31:49 +01:00
|
|
|
})
|
|
|
|
|
2017-08-31 11:52:38 +02:00
|
|
|
(define_expand "vcond_mask_<mode><v_int_equiv>"
|
2016-08-10 17:26:14 +02:00
|
|
|
[(match_operand:VALLDI 0 "register_operand")
|
|
|
|
(match_operand:VALLDI 1 "nonmemory_operand")
|
|
|
|
(match_operand:VALLDI 2 "nonmemory_operand")
|
2017-08-31 11:52:38 +02:00
|
|
|
(match_operand:<V_INT_EQUIV> 3 "register_operand")]
|
2016-08-10 17:26:14 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
/* If we have (a = (P) ? -1 : 0);
|
|
|
|
Then we can simply move the generated mask (result must be int). */
|
|
|
|
if (operands[1] == CONSTM1_RTX (<MODE>mode)
|
|
|
|
&& operands[2] == CONST0_RTX (<MODE>mode))
|
|
|
|
emit_move_insn (operands[0], operands[3]);
|
|
|
|
/* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
|
|
|
|
else if (operands[1] == CONST0_RTX (<MODE>mode)
|
|
|
|
&& operands[2] == CONSTM1_RTX (<MODE>mode))
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
|
2016-08-10 17:26:14 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
if (!REG_P (operands[1]))
|
|
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
|
|
if (!REG_P (operands[2]))
|
|
|
|
operands[2] = force_reg (<MODE>mode, operands[2]);
|
|
|
|
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
|
|
|
|
operands[1], operands[2]));
|
|
|
|
}
|
|
|
|
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; Patterns comparing two vectors to produce a mask.
|
|
|
|
|
|
|
|
(define_expand "vec_cmp<mode><mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
|
|
|
|
(match_operator 1 "comparison_operator"
|
|
|
|
[(match_operand:VSDQ_I_DI 2 "register_operand")
|
|
|
|
(match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
rtx mask = operands[0];
|
|
|
|
enum rtx_code code = GET_CODE (operands[1]);
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case NE:
|
|
|
|
case LE:
|
|
|
|
case LT:
|
|
|
|
case GE:
|
|
|
|
case GT:
|
|
|
|
case EQ:
|
|
|
|
if (operands[3] == CONST0_RTX (<MODE>mode))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Fall through. */
|
|
|
|
default:
|
|
|
|
if (!REG_P (operands[3]))
|
|
|
|
operands[3] = force_reg (<MODE>mode, operands[3]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case LT:
|
|
|
|
emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE:
|
|
|
|
emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LE:
|
|
|
|
emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GT:
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LTU:
|
|
|
|
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GEU:
|
|
|
|
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LEU:
|
|
|
|
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GTU:
|
|
|
|
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NE:
|
|
|
|
/* Handle NE as !EQ. */
|
|
|
|
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case EQ:
|
|
|
|
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2017-08-31 11:52:38 +02:00
|
|
|
(define_expand "vec_cmp<mode><v_int_equiv>"
|
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
|
2016-08-10 17:26:14 +02:00
|
|
|
(match_operator 1 "comparison_operator"
|
|
|
|
[(match_operand:VDQF 2 "register_operand")
|
|
|
|
(match_operand:VDQF 3 "nonmemory_operand")]))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int use_zero_form = 0;
|
|
|
|
enum rtx_code code = GET_CODE (operands[1]);
|
2017-08-31 11:52:38 +02:00
|
|
|
rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2016-08-10 17:26:14 +02:00
|
|
|
|
2016-08-11 12:01:03 +02:00
|
|
|
rtx (*comparison) (rtx, rtx, rtx) = NULL;
|
2016-08-10 17:26:14 +02:00
|
|
|
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case LE:
|
|
|
|
case LT:
|
|
|
|
case GE:
|
|
|
|
case GT:
|
|
|
|
case EQ:
|
|
|
|
if (operands[3] == CONST0_RTX (<MODE>mode))
|
|
|
|
{
|
|
|
|
use_zero_form = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Fall through. */
|
|
|
|
default:
|
|
|
|
if (!REG_P (operands[3]))
|
|
|
|
operands[3] = force_reg (<MODE>mode, operands[3]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case LT:
|
|
|
|
if (use_zero_form)
|
|
|
|
{
|
|
|
|
comparison = gen_aarch64_cmlt<mode>;
|
|
|
|
break;
|
|
|
|
}
|
2016-09-30 13:49:32 +02:00
|
|
|
/* Fall through. */
|
2016-08-10 17:26:14 +02:00
|
|
|
case UNGE:
|
|
|
|
std::swap (operands[2], operands[3]);
|
|
|
|
/* Fall through. */
|
|
|
|
case UNLE:
|
|
|
|
case GT:
|
|
|
|
comparison = gen_aarch64_cmgt<mode>;
|
|
|
|
break;
|
|
|
|
case LE:
|
|
|
|
if (use_zero_form)
|
|
|
|
{
|
|
|
|
comparison = gen_aarch64_cmle<mode>;
|
|
|
|
break;
|
|
|
|
}
|
2016-09-30 13:49:32 +02:00
|
|
|
/* Fall through. */
|
2016-08-10 17:26:14 +02:00
|
|
|
case UNGT:
|
|
|
|
std::swap (operands[2], operands[3]);
|
|
|
|
/* Fall through. */
|
|
|
|
case UNLT:
|
|
|
|
case GE:
|
|
|
|
comparison = gen_aarch64_cmge<mode>;
|
|
|
|
break;
|
|
|
|
case NE:
|
|
|
|
case EQ:
|
|
|
|
comparison = gen_aarch64_cmeq<mode>;
|
|
|
|
break;
|
|
|
|
case UNEQ:
|
|
|
|
case ORDERED:
|
|
|
|
case UNORDERED:
|
2017-12-14 11:35:38 +01:00
|
|
|
case LTGT:
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case UNGE:
|
|
|
|
case UNGT:
|
|
|
|
case UNLE:
|
|
|
|
case UNLT:
|
|
|
|
case NE:
|
|
|
|
/* FCM returns false for lanes which are unordered, so if we use
|
|
|
|
the inverse of the comparison we actually want to emit, then
|
|
|
|
invert the result, we will end up with the correct result.
|
|
|
|
Note that a NE NaN and NaN NE b are true for all a, b.
|
|
|
|
|
|
|
|
Our transformations are:
|
|
|
|
a UNGE b -> !(b GT a)
|
|
|
|
a UNGT b -> !(b GE a)
|
|
|
|
a UNLE b -> !(a GT b)
|
|
|
|
a UNLT b -> !(a GE b)
|
|
|
|
a NE b -> !(a EQ b) */
|
2016-08-11 12:01:03 +02:00
|
|
|
gcc_assert (comparison != NULL);
|
2016-08-10 17:26:14 +02:00
|
|
|
emit_insn (comparison (operands[0], operands[2], operands[3]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case LT:
|
|
|
|
case LE:
|
|
|
|
case GT:
|
|
|
|
case GE:
|
|
|
|
case EQ:
|
|
|
|
/* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
|
|
|
|
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
|
|
|
|
a GE b -> a GE b
|
|
|
|
a GT b -> a GT b
|
|
|
|
a LE b -> b GE a
|
|
|
|
a LT b -> b GT a
|
|
|
|
a EQ b -> a EQ b */
|
2016-08-11 12:01:03 +02:00
|
|
|
gcc_assert (comparison != NULL);
|
2016-08-10 17:26:14 +02:00
|
|
|
emit_insn (comparison (operands[0], operands[2], operands[3]));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case UNEQ:
|
|
|
|
/* We first check (a > b || b > a) which is !UNEQ, inverting
|
|
|
|
this result will then give us (a == b || a UNORDERED b). */
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (operands[0],
|
|
|
|
operands[2], operands[3]));
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
|
|
|
|
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
|
2017-12-14 11:35:38 +01:00
|
|
|
case LTGT:
|
|
|
|
/* LTGT is not guranteed to not generate a FP exception. So let's
|
|
|
|
go the faster way : ((a > b) || (b > a)). */
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (operands[0],
|
|
|
|
operands[2], operands[3]));
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
|
|
|
|
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
|
|
|
|
break;
|
|
|
|
|
2016-08-10 17:26:14 +02:00
|
|
|
case UNORDERED:
|
|
|
|
/* Operands are ORDERED iff (a > b || b >= a), so we can compute
|
|
|
|
UNORDERED as !ORDERED. */
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
|
|
|
|
emit_insn (gen_aarch64_cmge<mode> (operands[0],
|
|
|
|
operands[3], operands[2]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
|
|
|
|
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ORDERED:
|
|
|
|
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
|
|
|
|
emit_insn (gen_aarch64_cmge<mode> (operands[0],
|
|
|
|
operands[3], operands[2]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
|
2016-08-10 17:26:14 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "vec_cmpu<mode><mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
|
|
|
|
(match_operator 1 "comparison_operator"
|
|
|
|
[(match_operand:VSDQ_I_DI 2 "register_operand")
|
|
|
|
(match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
|
|
|
|
operands[2], operands[3]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-30 13:31:49 +01:00
|
|
|
(define_expand "vcond<mode><mode>"
|
2015-05-08 13:49:02 +02:00
|
|
|
[(set (match_operand:VALLDI 0 "register_operand")
|
|
|
|
(if_then_else:VALLDI
|
2012-10-30 13:31:49 +01:00
|
|
|
(match_operator 3 "comparison_operator"
|
2015-05-08 13:49:02 +02:00
|
|
|
[(match_operand:VALLDI 4 "register_operand")
|
|
|
|
(match_operand:VALLDI 5 "nonmemory_operand")])
|
|
|
|
(match_operand:VALLDI 1 "nonmemory_operand")
|
|
|
|
(match_operand:VALLDI 2 "nonmemory_operand")))]
|
2012-10-30 13:31:49 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-08-31 11:52:38 +02:00
|
|
|
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2016-08-16 15:09:40 +02:00
|
|
|
enum rtx_code code = GET_CODE (operands[3]);
|
2016-08-10 17:34:23 +02:00
|
|
|
|
2016-08-16 15:09:40 +02:00
|
|
|
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
|
|
|
|
it as well as switch operands 1/2 in order to avoid the additional
|
|
|
|
NOT instruction. */
|
|
|
|
if (code == NE)
|
|
|
|
{
|
|
|
|
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
|
|
|
|
operands[4], operands[5]);
|
|
|
|
std::swap (operands[1], operands[2]);
|
|
|
|
}
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
|
|
|
|
operands[4], operands[5]));
|
|
|
|
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
|
|
|
|
operands[2], mask));
|
2016-08-10 17:34:23 +02:00
|
|
|
|
2012-10-30 13:31:49 +01:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2016-08-10 17:34:23 +02:00
|
|
|
(define_expand "vcond<v_cmp_mixed><mode>"
|
|
|
|
[(set (match_operand:<V_cmp_mixed> 0 "register_operand")
|
|
|
|
(if_then_else:<V_cmp_mixed>
|
2013-05-01 12:40:23 +02:00
|
|
|
(match_operator 3 "comparison_operator"
|
2016-08-10 17:34:23 +02:00
|
|
|
[(match_operand:VDQF_COND 4 "register_operand")
|
|
|
|
(match_operand:VDQF_COND 5 "nonmemory_operand")])
|
|
|
|
(match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
|
|
|
|
(match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
|
2013-05-01 12:40:23 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-08-31 11:52:38 +02:00
|
|
|
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2016-08-16 15:09:40 +02:00
|
|
|
enum rtx_code code = GET_CODE (operands[3]);
|
2016-08-10 17:34:23 +02:00
|
|
|
|
2016-08-16 15:09:40 +02:00
|
|
|
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
|
|
|
|
it as well as switch operands 1/2 in order to avoid the additional
|
|
|
|
NOT instruction. */
|
|
|
|
if (code == NE)
|
|
|
|
{
|
|
|
|
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
|
|
|
|
operands[4], operands[5]);
|
|
|
|
std::swap (operands[1], operands[2]);
|
|
|
|
}
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
|
|
|
|
operands[4], operands[5]));
|
|
|
|
emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
|
2013-05-01 12:40:23 +02:00
|
|
|
operands[0], operands[1],
|
2016-08-10 17:34:23 +02:00
|
|
|
operands[2], mask));
|
|
|
|
|
2013-05-01 12:40:23 +02:00
|
|
|
DONE;
|
|
|
|
})
|
2012-10-30 13:31:49 +01:00
|
|
|
|
|
|
|
(define_expand "vcondu<mode><mode>"
|
2015-05-08 13:49:02 +02:00
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
|
|
|
|
(if_then_else:VSDQ_I_DI
|
2012-10-30 13:31:49 +01:00
|
|
|
(match_operator 3 "comparison_operator"
|
2015-05-08 13:49:02 +02:00
|
|
|
[(match_operand:VSDQ_I_DI 4 "register_operand")
|
|
|
|
(match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
|
|
|
|
(match_operand:VSDQ_I_DI 1 "nonmemory_operand")
|
|
|
|
(match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
|
2012-10-30 13:31:49 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2016-08-10 17:34:23 +02:00
|
|
|
rtx mask = gen_reg_rtx (<MODE>mode);
|
2016-08-16 15:09:40 +02:00
|
|
|
enum rtx_code code = GET_CODE (operands[3]);
|
2016-08-10 17:34:23 +02:00
|
|
|
|
2016-08-16 15:09:40 +02:00
|
|
|
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
|
|
|
|
it as well as switch operands 1/2 in order to avoid the additional
|
|
|
|
NOT instruction. */
|
|
|
|
if (code == NE)
|
|
|
|
{
|
|
|
|
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
|
|
|
|
operands[4], operands[5]);
|
|
|
|
std::swap (operands[1], operands[2]);
|
|
|
|
}
|
2016-08-10 17:34:23 +02:00
|
|
|
emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
|
|
|
|
operands[4], operands[5]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
|
|
|
|
operands[2], mask));
|
2016-08-10 17:34:23 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "vcondu<mode><v_cmp_mixed>"
|
|
|
|
[(set (match_operand:VDQF 0 "register_operand")
|
|
|
|
(if_then_else:VDQF
|
|
|
|
(match_operator 3 "comparison_operator"
|
|
|
|
[(match_operand:<V_cmp_mixed> 4 "register_operand")
|
|
|
|
(match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
|
|
|
|
(match_operand:VDQF 1 "nonmemory_operand")
|
|
|
|
(match_operand:VDQF 2 "nonmemory_operand")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-08-31 11:52:38 +02:00
|
|
|
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
2016-08-16 15:09:40 +02:00
|
|
|
enum rtx_code code = GET_CODE (operands[3]);
|
2016-08-10 17:34:23 +02:00
|
|
|
|
2016-08-16 15:09:40 +02:00
|
|
|
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
|
|
|
|
it as well as switch operands 1/2 in order to avoid the additional
|
|
|
|
NOT instruction. */
|
|
|
|
if (code == NE)
|
|
|
|
{
|
|
|
|
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
|
|
|
|
operands[4], operands[5]);
|
|
|
|
std::swap (operands[1], operands[2]);
|
|
|
|
}
|
2016-08-10 17:34:23 +02:00
|
|
|
emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
|
|
|
|
mask, operands[3],
|
|
|
|
operands[4], operands[5]));
|
2017-08-31 11:52:38 +02:00
|
|
|
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
|
|
|
|
operands[2], mask));
|
2012-10-30 13:31:49 +01:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Patterns for AArch64 SIMD Intrinsics.
|
|
|
|
|
2013-08-09 11:28:51 +02:00
|
|
|
;; Lane extraction with sign extension to general purpose register.
|
|
|
|
(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
|
|
|
|
[(set (match_operand:GPI 0 "register_operand" "=r")
|
|
|
|
(sign_extend:GPI
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_select:<VEL>
|
2013-08-09 11:28:51 +02:00
|
|
|
(match_operand:VDQQH 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2013-11-22 16:29:19 +01:00
|
|
|
return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_to_gp<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-08-09 11:28:51 +02:00
|
|
|
(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
|
|
|
|
[(set (match_operand:SI 0 "register_operand" "=r")
|
|
|
|
(zero_extend:SI
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_select:<VEL>
|
2013-08-09 11:28:51 +02:00
|
|
|
(match_operand:VDQQH 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2013-11-22 16:29:19 +01:00
|
|
|
return "umov\\t%w0, %1.<Vetype>[%2]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_to_gp<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-08-09 11:28:51 +02:00
|
|
|
;; Lane extraction of a value, neither sign nor zero extension
|
|
|
|
;; is guaranteed so upper bits should be considered undefined.
|
[AArch64]Remove be_checked_get_lane, check bounds with __builtin_aarch64_im_lane_boundsi.
gcc/:
PR target/63870
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane):
Delete.
* config/aarch64/aarch64-simd.md (aarch64_be_checked_get_lane<mode\>):
Delete.
* config/aarch64/arm_neon.h (aarch64_vget_lane_any): Use GCC
vector extensions, __aarch64_lane, __builtin_aarch64_im_lane_boundsi.
(__aarch64_vget_lane_f32, __aarch64_vget_lane_f64,
__aarch64_vget_lane_p8, __aarch64_vget_lane_p16,
__aarch64_vget_lane_s8, __aarch64_vget_lane_s16,
__aarch64_vget_lane_s32, __aarch64_vget_lane_s64,
__aarch64_vget_lane_u8, __aarch64_vget_lane_u16,
__aarch64_vget_lane_u32, __aarch64_vget_lane_u64,
__aarch64_vgetq_lane_f32, __aarch64_vgetq_lane_f64,
__aarch64_vgetq_lane_p8, __aarch64_vgetq_lane_p16,
__aarch64_vgetq_lane_s8, __aarch64_vgetq_lane_s16,
__aarch64_vgetq_lane_s32, __aarch64_vgetq_lane_s64,
__aarch64_vgetq_lane_u8, __aarch64_vgetq_lane_u16,
__aarch64_vgetq_lane_u32, __aarch64_vgetq_lane_u64): Delete.
(__aarch64_vdup_lane_any): Use __aarch64_vget_lane_any, remove
'q2' argument.
(__aarch64_vdup_lane_f32, __aarch64_vdup_lane_f64,
__aarch64_vdup_lane_p8, __aarch64_vdup_lane_p16,
__aarch64_vdup_lane_s8, __aarch64_vdup_lane_s16,
__aarch64_vdup_lane_s32, __aarch64_vdup_lane_s64,
__aarch64_vdup_lane_u8, __aarch64_vdup_lane_u16,
__aarch64_vdup_lane_u32, __aarch64_vdup_lane_u64,
__aarch64_vdup_laneq_f32, __aarch64_vdup_laneq_f64,
__aarch64_vdup_laneq_p8, __aarch64_vdup_laneq_p16,
__aarch64_vdup_laneq_s8, __aarch64_vdup_laneq_s16,
__aarch64_vdup_laneq_s32, __aarch64_vdup_laneq_s64,
__aarch64_vdup_laneq_u8, __aarch64_vdup_laneq_u16,
__aarch64_vdup_laneq_u32, __aarch64_vdup_laneq_u64): Remove argument
to __aarch64_vdup_lane_any.
(vget_lane_f32, vget_lane_f64, vget_lane_p8, vget_lane_p16,
vget_lane_s8, vget_lane_s16, vget_lane_s32, vget_lane_s64,
vget_lane_u8, vget_lane_u16, vget_lane_u32, vget_lane_u64,
vgetq_lane_f32, vgetq_lane_f64, vgetq_lane_p8, vgetq_lane_p16,
vgetq_lane_s8, vgetq_lane_s16, vgetq_lane_s32, vgetq_lane_s64,
vgetq_lane_u8, vgetq_lane_u16, vgetq_lane_u32, vgetq_lane_u64,
vdupb_lane_p8, vdupb_lane_s8, vdupb_lane_u8, vduph_lane_p16,
vduph_lane_s16, vduph_lane_u16, vdups_lane_f32, vdups_lane_s32,
vdups_lane_u32, vdupb_laneq_p8, vdupb_laneq_s8, vdupb_laneq_u8,
vduph_laneq_p16, vduph_laneq_s16, vduph_laneq_u16, vdups_laneq_f32,
vdups_laneq_s32, vdups_laneq_u32, vdupd_laneq_f64, vdupd_laneq_s64,
vdupd_laneq_u64, vfmas_lane_f32, vfma_laneq_f64, vfmad_laneq_f64,
vfmas_laneq_f32, vfmss_lane_f32, vfms_laneq_f64, vfmsd_laneq_f64,
vfmss_laneq_f32, vmla_lane_f32, vmla_lane_s16, vmla_lane_s32,
vmla_lane_u16, vmla_lane_u32, vmla_laneq_f32, vmla_laneq_s16,
vmla_laneq_s32, vmla_laneq_u16, vmla_laneq_u32, vmlaq_lane_f32,
vmlaq_lane_s16, vmlaq_lane_s32, vmlaq_lane_u16, vmlaq_lane_u32,
vmlaq_laneq_f32, vmlaq_laneq_s16, vmlaq_laneq_s32, vmlaq_laneq_u16,
vmlaq_laneq_u32, vmls_lane_f32, vmls_lane_s16, vmls_lane_s32,
vmls_lane_u16, vmls_lane_u32, vmls_laneq_f32, vmls_laneq_s16,
vmls_laneq_s32, vmls_laneq_u16, vmls_laneq_u32, vmlsq_lane_f32,
vmlsq_lane_s16, vmlsq_lane_s32, vmlsq_lane_u16, vmlsq_lane_u32,
vmlsq_laneq_f32, vmlsq_laneq_s16, vmlsq_laneq_s32, vmlsq_laneq_u16,
vmlsq_laneq_u32, vmul_lane_f32, vmul_lane_s16, vmul_lane_s32,
vmul_lane_u16, vmul_lane_u32, vmuld_lane_f64, vmuld_laneq_f64,
vmuls_lane_f32, vmuls_laneq_f32, vmul_laneq_f32, vmul_laneq_f64,
vmul_laneq_s16, vmul_laneq_s32, vmul_laneq_u16, vmul_laneq_u32,
vmulq_lane_f32, vmulq_lane_s16, vmulq_lane_s32, vmulq_lane_u16,
vmulq_lane_u32, vmulq_laneq_f32, vmulq_laneq_f64, vmulq_laneq_s16,
vmulq_laneq_s32, vmulq_laneq_u16, vmulq_laneq_u32) : Use
__aarch64_vget_lane_any.
gcc/testsuite/:
* gcc.target/aarch64/simd/vget_lane_f32_indices_1.c: New test.
* gcc.target/aarch64/simd/vget_lane_f64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_p16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_p8_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_s64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_s8_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_u16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_u32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_u64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vget_lane_u8_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_f32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_f64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_p16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_p8_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_s64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_s8_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_u16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_u32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_u64_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vgetq_lane_u8_indices_1.c: Likewise.
From-SVN: r218536
2014-12-09 21:23:36 +01:00
|
|
|
;; RTL uses GCC vector extension indices throughout so flip only for assembly.
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_get_lane<mode>"
|
2013-11-22 16:29:19 +01:00
|
|
|
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_select:<VEL>
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
(match_operand:VALL_F16 1 "register_operand" "w, w, w")
|
2013-11-22 16:29:19 +01:00
|
|
|
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2013-11-22 16:29:19 +01:00
|
|
|
switch (which_alternative)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
|
|
|
|
case 1:
|
|
|
|
return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
|
|
|
|
case 2:
|
|
|
|
return "st1\\t{%1.<Vetype>}[%2], %0";
|
|
|
|
default:
|
|
|
|
gcc_unreachable ();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
vec_merge + vec_duplicate + vec_concat simplification
Another vec_merge simplification that's missing is transforming:
(vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N))
into
(vec_concat x z) if N == 1 (0b01) or
(vec_concat y x) if N == 2 (0b10)
For the testcase in this patch on aarch64 this allows us to try matching during combine the pattern:
(set (reg:V2DI 78 [ x ])
(vec_concat:V2DI
(mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64])
(mem:DI (plus:DI (reg/v/f:DI 76 [ y ])
(const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64])))
rather than the more complex:
(set (reg:V2DI 78 [ x ])
(vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (plus:DI (reg/v/f:DI 76 [ y ])
(const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64]))
(vec_duplicate:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64]))
(const_int 2 [0x2])))
We don't actually have an aarch64 pattern for the simplified version above, but it's a simple enough
form to add, so this patch adds such a pattern that performs a concatenated load of two 64-bit vectors
in adjacent memory locations as a single Q-register LDR. The new aarch64 pattern is needed to demonstrate
the effectiveness of the simplify-rtx change, so I've kept them together as one patch.
Now for the testcase in the patch we can generate:
construct_lanedi:
ldr q0, [x0]
ret
construct_lanedf:
ldr q0, [x0]
ret
instead of:
construct_lanedi:
ld1r {v0.2d}, [x0]
ldr x0, [x0, 8]
ins v0.d[1], x0
ret
construct_lanedf:
ld1r {v0.2d}, [x0]
ldr d1, [x0, 8]
ins v0.d[1], v1.d[0]
ret
The new memory constraint Utq is needed because we need to allow only the Q-register addressing modes but
the MEM expressions in the RTL pattern have 64-bit vector modes, and if we don't constrain them they will
allow the D-register addressing modes during register allocation/address mode selection, which will produce
invalid assembly.
Bootstrapped and tested on aarch64-none-linux-gnu.
* simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
Simplify vec_merge of vec_duplicate and vec_concat.
* config/aarch64/constraints.md (Utq): New constraint.
* config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): New
define_insn.
* gcc.target/aarch64/load_v2vec_lanes_1.c: New test.
From-SVN: r254549
2017-11-08 19:27:57 +01:00
|
|
|
(define_insn "load_pair_lanes<mode>"
|
|
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
|
|
|
|
(vec_concat:<VDBL>
|
|
|
|
(match_operand:VDC 1 "memory_operand" "Utq")
|
|
|
|
(match_operand:VDC 2 "memory_operand" "m")))]
|
|
|
|
"TARGET_SIMD && !STRICT_ALIGNMENT
|
|
|
|
&& rtx_equal_p (XEXP (operands[2], 0),
|
|
|
|
plus_constant (Pmode,
|
|
|
|
XEXP (operands[1], 0),
|
|
|
|
GET_MODE_SIZE (<MODE>mode)))"
|
|
|
|
"ldr\\t%q0, %1"
|
|
|
|
[(set_attr "type" "neon_load1_1reg_q")]
|
|
|
|
)
|
|
|
|
|
[AArch64] Add STP pattern to store a vec_concat of two 64-bit registers
On top of the previous vec_merge simplifications [1] we can add this pattern to perform
a store of a vec_concat of two 64-bit values in distinct registers as an STP.
This avoids constructing such a vector explicitly in a register and storing it as
a Q register.
This way for the code in the testcase we can generate:
construct_lane_1:
ldp d1, d0, [x0]
fmov d3, 1.0e+0
fmov d2, 2.0e+0
fadd d4, d1, d3
fadd d5, d0, d2
stp d4, d5, [x1, 32]
ret
construct_lane_2:
ldp x2, x0, [x0]
add x3, x2, 1
add x4, x0, 2
stp x3, x4, [x1, 32]
ret
instead of the current:
construct_lane_1:
ldp d0, d1, [x0]
fmov d3, 1.0e+0
fmov d2, 2.0e+0
fadd d0, d0, d3
fadd d1, d1, d2
dup v0.2d, v0.d[0]
ins v0.d[1], v1.d[0]
str q0, [x1, 32]
ret
construct_lane_2:
ldp x2, x3, [x0]
add x0, x2, 1
add x2, x3, 2
dup v0.2d, x0
ins v0.d[1], x2
str q0, [x1, 32]
ret
Bootstrapped and tested on aarch64-none-linux-gnu.
[1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html
* config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
New pattern.
* config/aarch64/constraints.md (Uml): New constraint.
* config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
predicate.
* gcc.target/aarch64/store_v2vec_lanes.c: New test.
From-SVN: r254551
2017-11-08 19:32:09 +01:00
|
|
|
(define_insn "store_pair_lanes<mode>"
|
|
|
|
[(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
|
|
|
|
(vec_concat:<VDBL>
|
|
|
|
(match_operand:VDC 1 "register_operand" "w, r")
|
|
|
|
(match_operand:VDC 2 "register_operand" "w, r")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
2017-11-29 13:22:06 +01:00
|
|
|
stp\\t%d1, %d2, %y0
|
|
|
|
stp\\t%x1, %x2, %y0"
|
[AArch64] Add STP pattern to store a vec_concat of two 64-bit registers
On top of the previous vec_merge simplifications [1] we can add this pattern to perform
a store of a vec_concat of two 64-bit values in distinct registers as an STP.
This avoids constructing such a vector explicitly in a register and storing it as
a Q register.
This way for the code in the testcase we can generate:
construct_lane_1:
ldp d1, d0, [x0]
fmov d3, 1.0e+0
fmov d2, 2.0e+0
fadd d4, d1, d3
fadd d5, d0, d2
stp d4, d5, [x1, 32]
ret
construct_lane_2:
ldp x2, x0, [x0]
add x3, x2, 1
add x4, x0, 2
stp x3, x4, [x1, 32]
ret
instead of the current:
construct_lane_1:
ldp d0, d1, [x0]
fmov d3, 1.0e+0
fmov d2, 2.0e+0
fadd d0, d0, d3
fadd d1, d1, d2
dup v0.2d, v0.d[0]
ins v0.d[1], v1.d[0]
str q0, [x1, 32]
ret
construct_lane_2:
ldp x2, x3, [x0]
add x0, x2, 1
add x2, x3, 2
dup v0.2d, x0
ins v0.d[1], x2
str q0, [x1, 32]
ret
Bootstrapped and tested on aarch64-none-linux-gnu.
[1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html
* config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
New pattern.
* config/aarch64/constraints.md (Uml): New constraint.
* config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
predicate.
* gcc.target/aarch64/store_v2vec_lanes.c: New test.
From-SVN: r254551
2017-11-08 19:32:09 +01:00
|
|
|
[(set_attr "type" "neon_stp, store_16")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; In this insn, operand 1 should be low, and operand 2 the high part of the
|
|
|
|
;; dest vector.
|
|
|
|
|
|
|
|
(define_insn "*aarch64_combinez<mode>"
|
2015-10-02 10:32:12 +02:00
|
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
Simplify vec_merge of vec_duplicate with const_vector
I'm trying to improve some of the RTL-level handling of vector lane operations on aarch64 and that
involves dealing with a lot of vec_merge operations. One simplification that I noticed missing
from simplify-rtx are combinations of vec_merge with vec_duplicate.
In this particular case:
(vec_merge (vec_duplicate (X)) (const_vector [A, B]) (const_int N))
which can be replaced with
(vec_concat (X) (B)) if N == 1 (0b01) or
(vec_concat (A) (X)) if N == 2 (0b10).
For the aarch64 testcase in this patch this simplifications allows us to try to combine:
(set (reg:V2DI 77 [ x ])
(vec_concat:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64])
(const_int 0 [0])))
instead of the more complex:
(set (reg:V2DI 77 [ x ])
(vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64]))
(const_vector:V2DI [
(const_int 0 [0])
(const_int 0 [0])
])
(const_int 1 [0x1])))
For the simplified form above we already have an aarch64 pattern: *aarch64_combinez<mode> which
is missing a DI/DFmode version due to an oversight, so this patch extends that pattern as well to
use the VDC mode iterator that includes DI and DFmode (as well as V2HF which VD_BHSI was missing).
The aarch64 hunk is needed to see the benefit of the simplify-rtx.c hunk, so I didn't split them
into separate patches.
Before this for the testcase we'd generate:
construct_lanedi:
movi v0.4s, 0
ldr x0, [x0]
ins v0.d[0], x0
ret
construct_lanedf:
movi v0.2d, 0
ldr d1, [x0]
ins v0.d[0], v1.d[0]
ret
but now we can generate:
construct_lanedi:
ldr d0, [x0]
ret
construct_lanedf:
ldr d0, [x0]
ret
Bootstrapped and tested on aarch64-none-linux-gnu.
* simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
Simplify vec_merge of vec_duplicate and const_vector.
* config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero):
New predicate.
* config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Use VDC
mode iterator. Update predicate on operand 1 to
handle non-const_vec constants. Delete constraints.
(*aarch64_combinez_be<mode>): Likewise for operand 2.
* gcc.target/aarch64/construct_lane_zero_1.c: New test.
From-SVN: r254548
2017-11-08 19:23:35 +01:00
|
|
|
(vec_concat:<VDBL>
|
|
|
|
(match_operand:VDC 1 "general_operand" "w,?r,m")
|
|
|
|
(match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
|
2014-07-04 17:56:27 +02:00
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
2015-10-02 10:32:12 +02:00
|
|
|
"@
|
|
|
|
mov\\t%0.8b, %1.8b
|
|
|
|
fmov\t%d0, %1
|
|
|
|
ldr\\t%d0, %1"
|
|
|
|
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
|
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")]
|
2014-07-04 17:56:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_combinez_be<mode>"
|
2015-10-02 10:32:12 +02:00
|
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
2014-07-04 17:56:27 +02:00
|
|
|
(vec_concat:<VDBL>
|
Simplify vec_merge of vec_duplicate with const_vector
I'm trying to improve some of the RTL-level handling of vector lane operations on aarch64 and that
involves dealing with a lot of vec_merge operations. One simplification that I noticed missing
from simplify-rtx are combinations of vec_merge with vec_duplicate.
In this particular case:
(vec_merge (vec_duplicate (X)) (const_vector [A, B]) (const_int N))
which can be replaced with
(vec_concat (X) (B)) if N == 1 (0b01) or
(vec_concat (A) (X)) if N == 2 (0b10).
For the aarch64 testcase in this patch this simplifications allows us to try to combine:
(set (reg:V2DI 77 [ x ])
(vec_concat:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64])
(const_int 0 [0])))
instead of the more complex:
(set (reg:V2DI 77 [ x ])
(vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64]))
(const_vector:V2DI [
(const_int 0 [0])
(const_int 0 [0])
])
(const_int 1 [0x1])))
For the simplified form above we already have an aarch64 pattern: *aarch64_combinez<mode> which
is missing a DI/DFmode version due to an oversight, so this patch extends that pattern as well to
use the VDC mode iterator that includes DI and DFmode (as well as V2HF which VD_BHSI was missing).
The aarch64 hunk is needed to see the benefit of the simplify-rtx.c hunk, so I didn't split them
into separate patches.
Before this for the testcase we'd generate:
construct_lanedi:
movi v0.4s, 0
ldr x0, [x0]
ins v0.d[0], x0
ret
construct_lanedf:
movi v0.2d, 0
ldr d1, [x0]
ins v0.d[0], v1.d[0]
ret
but now we can generate:
construct_lanedi:
ldr d0, [x0]
ret
construct_lanedf:
ldr d0, [x0]
ret
Bootstrapped and tested on aarch64-none-linux-gnu.
* simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
Simplify vec_merge of vec_duplicate and const_vector.
* config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero):
New predicate.
* config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Use VDC
mode iterator. Update predicate on operand 1 to
handle non-const_vec constants. Delete constraints.
(*aarch64_combinez_be<mode>): Likewise for operand 2.
* gcc.target/aarch64/construct_lane_zero_1.c: New test.
From-SVN: r254548
2017-11-08 19:23:35 +01:00
|
|
|
(match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
|
|
|
|
(match_operand:VDC 1 "general_operand" "w,?r,m")))]
|
2014-07-04 17:56:27 +02:00
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
2015-10-02 10:32:12 +02:00
|
|
|
"@
|
|
|
|
mov\\t%0.8b, %1.8b
|
|
|
|
fmov\t%d0, %1
|
|
|
|
ldr\\t%d0, %1"
|
|
|
|
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
|
|
|
|
(set_attr "simd" "yes,*,yes")
|
|
|
|
(set_attr "fp" "*,yes,*")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2014-07-04 17:56:27 +02:00
|
|
|
(define_expand "aarch64_combine<mode>"
|
|
|
|
[(match_operand:<VDBL> 0 "register_operand")
|
|
|
|
(match_operand:VDC 1 "register_operand")
|
|
|
|
(match_operand:VDC 2 "register_operand")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2017-06-27 19:29:06 +02:00
|
|
|
aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
|
2014-07-04 17:56:27 +02:00
|
|
|
|
2013-06-12 17:34:06 +02:00
|
|
|
DONE;
|
2013-10-15 17:30:00 +02:00
|
|
|
}
|
|
|
|
)
|
2013-06-12 17:34:06 +02:00
|
|
|
|
|
|
|
(define_expand "aarch64_simd_combine<mode>"
|
2014-07-04 17:56:27 +02:00
|
|
|
[(match_operand:<VDBL> 0 "register_operand")
|
|
|
|
(match_operand:VDC 1 "register_operand")
|
|
|
|
(match_operand:VDC 2 "register_operand")]
|
2013-06-12 17:34:06 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
|
|
|
|
emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
|
|
|
|
DONE;
|
2013-10-15 17:30:00 +02:00
|
|
|
}
|
|
|
|
[(set_attr "type" "multiple")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
;; <su><addsub>l<q>.
|
|
|
|
|
2013-10-01 17:08:46 +02:00
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_dup 3)))))]
|
|
|
|
"TARGET_SIMD"
|
2013-10-01 17:08:46 +02:00
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-10-01 17:08:46 +02:00
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
|
|
|
|
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_dup 3)))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
|
2013-10-01 17:08:46 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_expand "aarch64_saddl2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2013-10-01 17:08:46 +02:00
|
|
|
emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_uaddl2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2013-10-01 17:08:46 +02:00
|
|
|
emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_ssubl2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2013-10-01 17:08:46 +02:00
|
|
|
emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_usubl2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2013-10-01 17:08:46 +02:00
|
|
|
emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 1 "register_operand" "w"))
|
2012-10-23 19:02:30 +02:00
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 2 "register_operand" "w"))))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2014-11-21 13:29:26 +01:00
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; <su><addsub>w<q>.
|
|
|
|
|
2015-11-25 07:51:55 +01:00
|
|
|
(define_expand "widen_ssum<mode>3"
|
|
|
|
[(set (match_operand:<VDBLW> 0 "register_operand" "")
|
|
|
|
(plus:<VDBLW> (sign_extend:<VDBLW>
|
|
|
|
(match_operand:VQW 1 "register_operand" ""))
|
|
|
|
(match_operand:<VDBLW> 2 "register_operand" "")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2015-11-25 07:51:55 +01:00
|
|
|
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
|
|
|
|
operands[1], p));
|
|
|
|
emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "widen_ssum<mode>3"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(plus:<VWIDE> (sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_BHSI 1 "register_operand" ""))
|
|
|
|
(match_operand:<VWIDE> 2 "register_operand" "")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "widen_usum<mode>3"
|
|
|
|
[(set (match_operand:<VDBLW> 0 "register_operand" "")
|
|
|
|
(plus:<VDBLW> (zero_extend:<VDBLW>
|
|
|
|
(match_operand:VQW 1 "register_operand" ""))
|
|
|
|
(match_operand:<VDBLW> 2 "register_operand" "")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
2015-11-25 07:51:55 +01:00
|
|
|
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
|
|
|
|
operands[1], p));
|
|
|
|
emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "widen_usum<mode>3"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "")
|
|
|
|
(plus:<VWIDE> (zero_extend:<VWIDE>
|
|
|
|
(match_operand:VD_BHSI 1 "register_operand" ""))
|
|
|
|
(match_operand:<VWIDE> 2 "register_operand" "")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(ANY_EXTEND:<VWIDE>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VD_BHSI 2 "register_operand" "w"))))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_widen")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2015-11-25 07:51:55 +01:00
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(ANY_EXTEND:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
|
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_widen")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(ANY_EXTEND:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")
|
|
|
|
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<ADDSUB:optab>_widen")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_saddw2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_uaddw2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
(define_expand "aarch64_ssubw2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_usubw2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQW 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; <su><r>h<addsub>.
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>h<addsub><mode>"
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_BHSI 2 "register_operand" "w")]
|
2012-10-23 19:02:30 +02:00
|
|
|
HADDSUB))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<addsub>_halve<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; <r><addsub>hn<q>.
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur><addsub>hn<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
|
|
|
|
(match_operand:VQN 2 "register_operand" "w")]
|
|
|
|
ADDSUBHN))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur><addsub>hn2<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
|
|
|
|
(match_operand:VQN 2 "register_operand" "w")
|
|
|
|
(match_operand:VQN 3 "register_operand" "w")]
|
|
|
|
ADDSUBHN2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; pmul.
|
|
|
|
|
|
|
|
(define_insn "aarch64_pmul<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:VB 1 "register_operand" "w")
|
|
|
|
(match_operand:VB 2 "register_operand" "w")]
|
|
|
|
UNSPEC_PMUL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_mul_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2015-11-03 11:58:27 +01:00
|
|
|
;; fmulx.
|
|
|
|
|
|
|
|
(define_insn "aarch64_fmulx<mode>"
|
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
From-SVN: r238723
2016-07-25 18:10:52 +02:00
|
|
|
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF_HSDF
|
|
|
|
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
UNSPEC_FMULX))]
|
2015-11-03 11:58:27 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_mul_<stype>")]
|
2015-11-03 11:58:27 +01:00
|
|
|
)
|
|
|
|
|
2015-11-22 16:15:20 +01:00
|
|
|
;; vmulxq_lane_f32, and vmulx_laneq_f32
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQSF
|
|
|
|
[(match_operand:VDQSF 1 "register_operand" "w")
|
|
|
|
(vec_duplicate:VDQSF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
|
|
|
|
UNSPEC_FMULX))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
|
2015-11-22 16:15:20 +01:00
|
|
|
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mulx_elt<mode>"
|
|
|
|
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQF
|
|
|
|
[(match_operand:VDQF 1 "register_operand" "w")
|
|
|
|
(vec_duplicate:VDQF
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:VDQF 2 "register_operand" "w")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
|
|
|
|
UNSPEC_FMULX))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
|
2015-11-22 16:15:20 +01:00
|
|
|
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_fp_mul_<Vetype><q>")]
|
|
|
|
)
|
|
|
|
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
;; vmulxq_lane
|
2015-11-22 16:15:20 +01:00
|
|
|
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
(define_insn "*aarch64_mulx_elt_from_dup<mode>"
|
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF
|
|
|
|
[(match_operand:VHSDF 1 "register_operand" "w")
|
|
|
|
(vec_duplicate:VHSDF
|
2017-03-16 11:03:11 +01:00
|
|
|
(match_operand:<VEL> 2 "register_operand" "<h_con>"))]
|
2015-11-22 16:15:20 +01:00
|
|
|
UNSPEC_FMULX))]
|
|
|
|
"TARGET_SIMD"
|
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
2016-07-25 16:49:57 +02:00
|
|
|
"fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
|
|
|
|
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
2015-11-22 16:15:20 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vmulxs_lane_f32, vmulxs_laneq_f32
|
|
|
|
;; vmulxd_lane_f64 == vmulx_lane_f64
|
|
|
|
;; vmulxd_laneq_f64 == vmulx_laneq_f64
|
|
|
|
|
|
|
|
(define_insn "*aarch64_vgetfmulx<mode>"
|
|
|
|
[(set (match_operand:<VEL> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VEL>
|
|
|
|
[(match_operand:<VEL> 1 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
2017-03-09 11:34:36 +01:00
|
|
|
(match_operand:VDQF 2 "register_operand" "w")
|
2015-11-22 16:15:20 +01:00
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
|
|
|
|
UNSPEC_FMULX))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
|
2015-11-22 16:15:20 +01:00
|
|
|
return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "fmul<Vetype>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
;; <su>q<addsub>
|
|
|
|
|
|
|
|
(define_insn "aarch64_<su_optab><optab><mode>"
|
|
|
|
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
|
|
|
|
(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VSDQ_I 2 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<optab><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; suqadd and usqadd
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>qadd<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
|
|
|
|
(match_operand:VSDQ_I 2 "register_operand" "w")]
|
|
|
|
USSUQADD))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_qadd<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; sqmovun
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqmovun<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
|
|
|
|
UNSPEC_SQXTUN))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
2015-11-26 14:50:47 +01:00
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
;; sqmovn and uqmovn
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>qmovn<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
|
|
|
|
SUQMOVN))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
2015-11-26 14:50:47 +01:00
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
;; <su>q<absneg>
|
|
|
|
|
|
|
|
(define_insn "aarch64_s<optab><mode>"
|
2014-04-22 17:55:53 +02:00
|
|
|
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
|
|
|
|
(UNQOPS:VSDQ_I
|
|
|
|
(match_operand:VSDQ_I 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_<optab><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; sq<r>dmulh.
|
|
|
|
|
|
|
|
(define_insn "aarch64_sq<r>dmulh<mode>"
|
|
|
|
[(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_HSI
|
|
|
|
[(match_operand:VSDQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VSDQ_HSI 2 "register_operand" "w")]
|
|
|
|
VQDMULH))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; sq<r>dmulh_lane
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sq<r>dmulh_lane<mode>"
|
2013-01-25 12:35:03 +01:00
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQHS
|
|
|
|
[(match_operand:VDQHS 1 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
|
|
|
|
VQDMULH))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"*
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
|
2013-01-25 12:35:03 +01:00
|
|
|
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
|
2013-01-25 12:35:03 +01:00
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
|
2013-01-25 12:35:03 +01:00
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQHS
|
|
|
|
[(match_operand:VDQHS 1 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
|
|
|
|
VQDMULH))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"*
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
|
2013-01-25 12:35:03 +01:00
|
|
|
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
|
2013-01-25 12:35:03 +01:00
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sq<r>dmulh_lane<mode>"
|
2013-01-25 12:35:03 +01:00
|
|
|
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:SD_HSI
|
|
|
|
[(match_operand:SD_HSI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
|
|
|
|
VQDMULH))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"*
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
|
2013-01-25 12:35:03 +01:00
|
|
|
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
|
2014-08-05 12:43:41 +02:00
|
|
|
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:SD_HSI
|
|
|
|
[(match_operand:SD_HSI 1 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
|
|
|
|
VQDMULH))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"*
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
|
2014-08-05 12:43:41 +02:00
|
|
|
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
|
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
|
|
|
|
)
|
|
|
|
|
2015-11-26 14:50:47 +01:00
|
|
|
;; sqrdml[as]h.
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
|
|
|
|
[(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_HSI
|
|
|
|
[(match_operand:VSDQ_HSI 1 "register_operand" "0")
|
|
|
|
(match_operand:VSDQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VSDQ_HSI 3 "register_operand" "w")]
|
|
|
|
SQRDMLH_AS))]
|
|
|
|
"TARGET_SIMD_RDMA"
|
|
|
|
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; sqrdml[as]h_lane.
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQHS
|
|
|
|
[(match_operand:VDQHS 1 "register_operand" "0")
|
|
|
|
(match_operand:VDQHS 2 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
2016-02-16 16:59:51 +01:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
2015-11-26 14:50:47 +01:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
|
|
|
|
SQRDMLH_AS))]
|
|
|
|
"TARGET_SIMD_RDMA"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
|
2015-11-26 14:50:47 +01:00
|
|
|
return
|
|
|
|
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
|
|
|
|
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:SD_HSI
|
|
|
|
[(match_operand:SD_HSI 1 "register_operand" "0")
|
|
|
|
(match_operand:SD_HSI 2 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
2016-02-16 16:59:51 +01:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
2015-11-26 14:50:47 +01:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
|
|
|
|
SQRDMLH_AS))]
|
|
|
|
"TARGET_SIMD_RDMA"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
|
2015-11-26 14:50:47 +01:00
|
|
|
return
|
|
|
|
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; sqrdml[as]h_laneq.
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
|
|
|
|
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQHS
|
|
|
|
[(match_operand:VDQHS 1 "register_operand" "0")
|
|
|
|
(match_operand:VDQHS 2 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
2016-02-16 16:59:51 +01:00
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2015-11-26 14:50:47 +01:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
|
|
|
|
SQRDMLH_AS))]
|
|
|
|
"TARGET_SIMD_RDMA"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
|
2015-11-26 14:50:47 +01:00
|
|
|
return
|
|
|
|
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
|
|
|
|
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
|
|
|
|
(unspec:SD_HSI
|
|
|
|
[(match_operand:SD_HSI 1 "register_operand" "0")
|
|
|
|
(match_operand:SD_HSI 2 "register_operand" "w")
|
|
|
|
(vec_select:<VEL>
|
2016-02-16 16:59:51 +01:00
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2015-11-26 14:50:47 +01:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
|
|
|
|
SQRDMLH_AS))]
|
|
|
|
"TARGET_SIMD_RDMA"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
|
2015-11-26 14:50:47 +01:00
|
|
|
return
|
|
|
|
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; vqdml[sa]l
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VSD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VSD_HSI 3 "register_operand" "w")))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqdml[sa]l_lane
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
|
2014-06-20 10:51:34 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:SD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
|
|
|
|
)
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
|
2014-06-20 10:51:34 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:SD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
|
|
|
|
)
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqdml[sa]l_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 2 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
2013-09-06 13:02:52 +02:00
|
|
|
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
|
2012-10-23 19:02:30 +02:00
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; sqdml[as]l2
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 3 "register_operand" "w")
|
|
|
|
(match_dup 4))))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlal2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 3 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlsl2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 3 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; vqdml[sa]l2_lane
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
|
|
|
|
))))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
|
|
|
|
))))
|
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return
|
|
|
|
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlal2_lane<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3],
|
|
|
|
operands[4], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlal2_laneq<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2014-06-20 10:51:34 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[2], operands[3],
|
|
|
|
operands[4], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlsl2_lane<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3],
|
|
|
|
operands[4], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlsl2_laneq<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 4 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2014-06-20 10:51:34 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[2], operands[3],
|
|
|
|
operands[4], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(SBINQOPS:<VWIDE>
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "0")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
2013-09-06 13:02:52 +02:00
|
|
|
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
|
2012-10-23 19:02:30 +02:00
|
|
|
(const_int 1))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlal2_n<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:<VEL> 3 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3],
|
|
|
|
p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmlsl2_n<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:<VWIDE> 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_operand:<VEL> 3 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3],
|
|
|
|
p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; vqdmull
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VSD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VSD_HSI 2 "register_operand" "w")))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqdmull_lane
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdmull_lane<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdmull_laneq<mode>"
|
2014-06-20 10:51:34 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdmull_lane<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:SD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness
gcc/:
* config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add
qualifier_lane_index.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to...
(aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these.
(aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New.
(aarch64_types_getlane_qualifiers): Rename to...
(aarch64_types_binop_imm_qualifiers): ...this.
(TYPES_SHIFTIMM): Follow renaming.
(TYPES_GETLANE): Rename to...
(TYPE_GETREG): ...this.
(aarch64_types_setlane_qualifiers): Rename to...
(aarch64_type_ternop_imm_qualifiers): ...this.
(TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming.
(TYPES_SETLANE): Follow renaming above, and rename self to...
(TYPE_SETREG): ...this.
(enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX.
(aarch64_simd_expand_args): Add range check and endianness-flip.
(aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index.
* config/aarch64/aarch64-simd.md
(aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check.
(aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete.
(aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to...
(aarch64_sq<r>dmulh_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to...
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this.
(aarch64_sqdmull_lane<mode>_internal *2): Rename to...
(aarch64_sqdmull_lane<mode>): ...this.
(aarch64_sqdmull_laneq<mode>_internal *2): Rename to...
(aarch64_sqdmull_laneq<mode>): ...this.
(aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>,
(aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete.
(aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove
bounds check and lane flip.
* config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane,
get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi,
set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG.
(sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq,
sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow
renaming of TERNOP_LANE to QUADOP_LANE.
(sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq,
sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set
qualifiers to TERNOP_LANE.
gcc/testsuite/:
* gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test.
* gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise.
From-SVN: r217440
2014-11-12 19:51:53 +01:00
|
|
|
(define_insn "aarch64_sqdmull_laneq<mode>"
|
2014-06-20 10:51:34 +02:00
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:SD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqdmull_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull_n<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(match_operand:VD_HSI 1 "register_operand" "w"))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:VD_HSI
|
2013-09-06 13:02:52 +02:00
|
|
|
(match_operand:<VEL> 2 "register_operand" "<vwx>")))
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqdmull2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull2<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")
|
|
|
|
(match_dup 3)))
|
|
|
|
)
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmull2<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:VQ_HSI 2 "register_operand" "w")]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; vqdmull2_lane
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull2_lane<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
|
|
|
(vec_select:<VEL>
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
|
2014-06-20 10:51:34 +02:00
|
|
|
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
|
|
|
(vec_select:<VEL>
|
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
|
|
|
|
))
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
2014-01-23 15:56:50 +01:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
|
2014-01-23 15:56:50 +01:00
|
|
|
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
|
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmull2_lane<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 3 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], operands[3],
|
|
|
|
p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmull2_laneq<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
2014-06-20 10:51:34 +02:00
|
|
|
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
|
2012-10-23 19:02:30 +02:00
|
|
|
(match_operand:SI 3 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2014-06-20 10:51:34 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
|
2012-10-23 19:02:30 +02:00
|
|
|
operands[2], operands[3],
|
|
|
|
p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; vqdmull2_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_sqdmull2_n<mode>_internal"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(ss_ashift:<VWIDE>
|
|
|
|
(mult:<VWIDE>
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_select:<VHALF>
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
|
|
|
|
(sign_extend:<VWIDE>
|
|
|
|
(vec_duplicate:<VHALF>
|
2013-09-06 13:02:52 +02:00
|
|
|
(match_operand:<VEL> 2 "register_operand" "<vwx>")))
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
(const_int 1)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_sqdmull2_n<mode>"
|
|
|
|
[(match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(match_operand:VQ_HSI 1 "register_operand" "w")
|
|
|
|
(match_operand:<VEL> 2 "register_operand" "w")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
This patch passes the number of units to aarch64_simd_vect_par_cnst_half,
which avoids a to_constant () once GET_MODE_NUNITS is variable.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half):
Take the number of units too.
* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise.
(aarch64_simd_check_vect_par_cnst_half): Update call accordingly,
but check for a vector mode before rather than after the call.
* config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>)
(move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>)
(vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>)
(vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>)
(vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>)
(aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3)
(widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>)
(aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>)
(aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>)
(aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>)
(aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>)
(aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>)
(aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>)
(aarch64_sqdmull2_n<mode>): Update accordingly.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254468
2017-11-06 21:02:35 +01:00
|
|
|
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
2012-10-23 19:02:30 +02:00
|
|
|
emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
|
|
|
|
operands[2], p));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; vshl
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>shl<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I_DI
|
|
|
|
[(match_operand:VSDQ_I_DI 1 "register_operand" "w")
|
|
|
|
(match_operand:VSDQ_I_DI 2 "register_operand" "w")]
|
|
|
|
VSHL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_reg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
;; vqshl
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>q<r>shl<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I
|
|
|
|
[(match_operand:VSDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VSDQ_I 2 "register_operand" "w")]
|
|
|
|
VQSHL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_reg<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vshll_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>shll_n<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 2
|
|
|
|
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VSHLL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2016-06-15 18:07:34 +02:00
|
|
|
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
|
|
|
return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
|
|
|
else
|
|
|
|
return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
2012-10-23 19:02:30 +02:00
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vshll_high_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>shll2_n<mode>"
|
|
|
|
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
VSHLL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2016-06-15 18:07:34 +02:00
|
|
|
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
|
|
|
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
|
|
|
else
|
|
|
|
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
|
2012-10-23 19:02:30 +02:00
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vrshr_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>shr_n<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 2
|
|
|
|
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VRSHR_N))]
|
|
|
|
"TARGET_SIMD"
|
2014-09-25 18:54:38 +02:00
|
|
|
"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; v(r)sra_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>sra_n<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
|
|
|
|
(match_operand:VSDQ_I_DI 2 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 3
|
|
|
|
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VSRA))]
|
|
|
|
"TARGET_SIMD"
|
2014-09-25 18:54:38 +02:00
|
|
|
"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_acc<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vs<lr>i_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>s<lr>i_n<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
|
|
|
|
(match_operand:VSDQ_I_DI 2 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 3
|
|
|
|
"aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VSLRI))]
|
|
|
|
"TARGET_SIMD"
|
2014-09-25 18:54:38 +02:00
|
|
|
"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; vqshl(u)
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>qshl<u>_n<mode>"
|
|
|
|
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
|
|
|
|
(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 2
|
|
|
|
"aarch64_simd_shift_imm_<ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VQSHL_N))]
|
|
|
|
"TARGET_SIMD"
|
2014-09-25 18:54:38 +02:00
|
|
|
"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_imm<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
;; vq(r)shr(u)n_n
|
|
|
|
|
|
|
|
(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
|
|
|
|
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
|
|
(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
|
2014-09-25 18:54:38 +02:00
|
|
|
(match_operand:SI 2
|
|
|
|
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
|
2012-10-23 19:02:30 +02:00
|
|
|
VQSHRN_N))]
|
|
|
|
"TARGET_SIMD"
|
2014-09-25 18:54:38 +02:00
|
|
|
"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
;; cm(eq|ge|gt|lt|le)
|
|
|
|
;; Note, we have constraints for Dz and Z as different expanders
|
|
|
|
;; have different ideas of what should be passed to this pattern.
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
(define_insn "aarch64_cm<optab><mode>"
|
2017-08-31 11:52:38 +02:00
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
|
|
|
|
(neg:<V_INT_EQUIV>
|
|
|
|
(COMPARISONS:<V_INT_EQUIV>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VDQ_I 1 "register_operand" "w,w")
|
|
|
|
(match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
|
2013-05-01 12:33:57 +02:00
|
|
|
)))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
2013-05-01 12:33:57 +02:00
|
|
|
cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
|
|
|
|
cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
(define_insn_and_split "aarch64_cm<optab>di"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,w,r")
|
|
|
|
(neg:DI
|
|
|
|
(COMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w,w,r")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
|
2013-05-23 12:18:19 +02:00
|
|
|
)))
|
|
|
|
(clobber (reg:CC CC_REGNUM))]
|
2013-05-01 12:33:57 +02:00
|
|
|
"TARGET_SIMD"
|
2014-04-22 12:49:48 +02:00
|
|
|
"#"
|
2017-12-05 15:40:37 +01:00
|
|
|
"&& reload_completed"
|
2014-04-22 12:49:48 +02:00
|
|
|
[(set (match_operand:DI 0 "register_operand")
|
|
|
|
(neg:DI
|
|
|
|
(COMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero")
|
|
|
|
)))]
|
2013-05-01 12:33:57 +02:00
|
|
|
{
|
2014-04-22 12:49:48 +02:00
|
|
|
/* If we are in the general purpose register file,
|
|
|
|
we split to a sequence of comparison and store. */
|
|
|
|
if (GP_REGNUM_P (REGNO (operands[0]))
|
|
|
|
&& GP_REGNUM_P (REGNO (operands[1])))
|
|
|
|
{
|
decl.c, [...]: Remove redundant enum from machine_mode.
gcc/ada/
* gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c,
gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c:
Remove redundant enum from machine_mode.
gcc/c-family/
* c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant
enum from machine_mode.
gcc/c/
* c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from
machine_mode.
gcc/cp/
* constexpr.c: Remove redundant enum from machine_mode.
gcc/fortran/
* trans-types.c, trans-types.h: Remove redundant enum from
machine_mode.
gcc/go/
* go-lang.c: Remove redundant enum from machine_mode.
gcc/java/
* builtins.c, java-tree.h, typeck.c: Remove redundant enum from
machine_mode.
gcc/lto/
* lto-lang.c: Remove redundant enum from machine_mode.
gcc/
* addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c,
builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h,
cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md,
config/aarch64/aarch64.c, config/aarch64/aarch64.h,
config/aarch64/aarch64.md, config/alpha/alpha-protos.h,
config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c,
config/arc/arc.h, config/arc/predicates.md,
config/arm/aarch-common-protos.h, config/arm/aarch-common.c,
config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h,
config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md,
config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c,
config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c,
config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md,
config/cr16/cr16-protos.h, config/cr16/cr16.c,
config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md,
config/darwin-protos.h, config/darwin.c,
config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c,
config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md,
config/h8300/h8300-protos.h, config/h8300/h8300.c,
config/i386/i386-builtin-types.awk, config/i386/i386-protos.h,
config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md,
config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h,
config/ia64/ia64.c, config/iq2000/iq2000-protos.h,
config/iq2000/iq2000.c, config/iq2000/iq2000.md,
config/lm32/lm32-protos.h, config/lm32/lm32.c,
config/m32c/m32c-protos.h, config/m32c/m32c.c,
config/m32r/m32r-protos.h, config/m32r/m32r.c,
config/m68k/m68k-protos.h, config/m68k/m68k.c,
config/mcore/mcore-protos.h, config/mcore/mcore.c,
config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c,
config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c,
config/mips/mips-protos.h, config/mips/mips.c,
config/mmix/mmix-protos.h, config/mmix/mmix.c,
config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430-protos.h,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-protos.h, config/nds32/nds32.c,
config/nios2/nios2-protos.h, config/nios2/nios2.c,
config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h,
config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000-c.c,
config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c,
config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c,
config/s390/predicates.md, config/s390/s390-protos.h,
config/s390/s390.c, config/s390/s390.h, config/s390/s390.md,
config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c,
config/sh/sh.md, config/sparc/predicates.md,
config/sparc/sparc-protos.h, config/sparc/sparc.c,
config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c,
config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c,
config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c,
config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h,
config/tilepro/tilepro.c, config/v850/v850-protos.h,
config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h,
config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h,
config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h,
dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi,
doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c,
dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h,
except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c,
fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h,
fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c,
genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c,
graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c,
internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c,
ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h,
libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c,
loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c,
lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c,
lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h,
output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h,
recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c,
regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h,
rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c,
sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c,
simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def,
targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c,
tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c,
tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c,
tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c,
tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c,
tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c,
tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c,
tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c,
tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Remove redundant enum from
machine_mode.
gcc/
* gengtype.c (main): Treat machine_mode as a scalar typedef.
* genmodes.c (emit_insn_modes_h): Hide inline functions if
USED_FOR_TARGET.
From-SVN: r216834
2014-10-29 13:02:45 +01:00
|
|
|
machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
|
2014-04-22 12:49:48 +02:00
|
|
|
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
|
|
|
|
rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
|
|
|
|
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
/* Otherwise, we expand to a similar pattern which does not
|
|
|
|
clobber CC_REGNUM. */
|
2013-05-01 12:33:57 +02:00
|
|
|
}
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
|
2013-05-01 12:33:57 +02:00
|
|
|
)
|
|
|
|
|
2014-04-22 12:49:48 +02:00
|
|
|
(define_insn "*aarch64_cm<optab>di"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,w")
|
|
|
|
(neg:DI
|
|
|
|
(COMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w,w")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD && reload_completed"
|
|
|
|
"@
|
|
|
|
cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
|
|
|
|
cm<optab>\t%d0, %d1, #0"
|
|
|
|
[(set_attr "type" "neon_compare, neon_compare_zero")]
|
|
|
|
)
|
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
;; cm(hs|hi)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
(define_insn "aarch64_cm<optab><mode>"
|
2017-08-31 11:52:38 +02:00
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
|
|
|
(neg:<V_INT_EQUIV>
|
|
|
|
(UCOMPARISONS:<V_INT_EQUIV>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w")
|
2013-05-01 12:33:57 +02:00
|
|
|
)))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
2013-05-01 12:33:57 +02:00
|
|
|
"cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_compare<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
(define_insn_and_split "aarch64_cm<optab>di"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,r")
|
|
|
|
(neg:DI
|
|
|
|
(UCOMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w,r")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
|
2013-05-23 12:18:19 +02:00
|
|
|
)))
|
|
|
|
(clobber (reg:CC CC_REGNUM))]
|
2013-05-01 12:33:57 +02:00
|
|
|
"TARGET_SIMD"
|
2014-04-22 12:49:48 +02:00
|
|
|
"#"
|
2017-12-05 15:40:37 +01:00
|
|
|
"&& reload_completed"
|
2014-04-22 12:49:48 +02:00
|
|
|
[(set (match_operand:DI 0 "register_operand")
|
|
|
|
(neg:DI
|
|
|
|
(UCOMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero")
|
|
|
|
)))]
|
2013-05-01 12:33:57 +02:00
|
|
|
{
|
2014-04-22 12:49:48 +02:00
|
|
|
/* If we are in the general purpose register file,
|
|
|
|
we split to a sequence of comparison and store. */
|
|
|
|
if (GP_REGNUM_P (REGNO (operands[0]))
|
|
|
|
&& GP_REGNUM_P (REGNO (operands[1])))
|
|
|
|
{
|
decl.c, [...]: Remove redundant enum from machine_mode.
gcc/ada/
* gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c,
gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c:
Remove redundant enum from machine_mode.
gcc/c-family/
* c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant
enum from machine_mode.
gcc/c/
* c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from
machine_mode.
gcc/cp/
* constexpr.c: Remove redundant enum from machine_mode.
gcc/fortran/
* trans-types.c, trans-types.h: Remove redundant enum from
machine_mode.
gcc/go/
* go-lang.c: Remove redundant enum from machine_mode.
gcc/java/
* builtins.c, java-tree.h, typeck.c: Remove redundant enum from
machine_mode.
gcc/lto/
* lto-lang.c: Remove redundant enum from machine_mode.
gcc/
* addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c,
builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h,
cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md,
config/aarch64/aarch64.c, config/aarch64/aarch64.h,
config/aarch64/aarch64.md, config/alpha/alpha-protos.h,
config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c,
config/arc/arc.h, config/arc/predicates.md,
config/arm/aarch-common-protos.h, config/arm/aarch-common.c,
config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h,
config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md,
config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c,
config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c,
config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md,
config/cr16/cr16-protos.h, config/cr16/cr16.c,
config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md,
config/darwin-protos.h, config/darwin.c,
config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c,
config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md,
config/h8300/h8300-protos.h, config/h8300/h8300.c,
config/i386/i386-builtin-types.awk, config/i386/i386-protos.h,
config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md,
config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h,
config/ia64/ia64.c, config/iq2000/iq2000-protos.h,
config/iq2000/iq2000.c, config/iq2000/iq2000.md,
config/lm32/lm32-protos.h, config/lm32/lm32.c,
config/m32c/m32c-protos.h, config/m32c/m32c.c,
config/m32r/m32r-protos.h, config/m32r/m32r.c,
config/m68k/m68k-protos.h, config/m68k/m68k.c,
config/mcore/mcore-protos.h, config/mcore/mcore.c,
config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c,
config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c,
config/mips/mips-protos.h, config/mips/mips.c,
config/mmix/mmix-protos.h, config/mmix/mmix.c,
config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430-protos.h,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-protos.h, config/nds32/nds32.c,
config/nios2/nios2-protos.h, config/nios2/nios2.c,
config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h,
config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000-c.c,
config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c,
config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c,
config/s390/predicates.md, config/s390/s390-protos.h,
config/s390/s390.c, config/s390/s390.h, config/s390/s390.md,
config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c,
config/sh/sh.md, config/sparc/predicates.md,
config/sparc/sparc-protos.h, config/sparc/sparc.c,
config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c,
config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c,
config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c,
config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h,
config/tilepro/tilepro.c, config/v850/v850-protos.h,
config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h,
config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h,
config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h,
dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi,
doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c,
dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h,
except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c,
fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h,
fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c,
genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c,
graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c,
internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c,
ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h,
libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c,
loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c,
lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c,
lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h,
output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h,
recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c,
regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h,
rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c,
sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c,
simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def,
targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c,
tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c,
tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c,
tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c,
tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c,
tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c,
tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c,
tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c,
tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Remove redundant enum from
machine_mode.
gcc/
* gengtype.c (main): Treat machine_mode as a scalar typedef.
* genmodes.c (emit_insn_modes_h): Hide inline functions if
USED_FOR_TARGET.
From-SVN: r216834
2014-10-29 13:02:45 +01:00
|
|
|
machine_mode mode = CCmode;
|
2014-04-22 12:49:48 +02:00
|
|
|
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
|
|
|
|
rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
|
|
|
|
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
/* Otherwise, we expand to a similar pattern which does not
|
|
|
|
clobber CC_REGNUM. */
|
2013-05-01 12:33:57 +02:00
|
|
|
}
|
2014-04-22 12:49:48 +02:00
|
|
|
[(set_attr "type" "neon_compare,multiple")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_cm<optab>di"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w")
|
|
|
|
(neg:DI
|
|
|
|
(UCOMPARISONS:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
|
|
|
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
|
|
|
|
)))]
|
|
|
|
"TARGET_SIMD && reload_completed"
|
|
|
|
"cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
|
|
|
|
[(set_attr "type" "neon_compare")]
|
2013-05-01 12:33:57 +02:00
|
|
|
)
|
2013-01-08 15:57:33 +01:00
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
;; cmtst
|
|
|
|
|
2014-09-05 12:50:04 +02:00
|
|
|
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
|
2016-08-10 17:34:23 +02:00
|
|
|
;; we don't have any insns using ne, and aarch64_vcond outputs
|
2014-09-05 12:50:04 +02:00
|
|
|
;; not (neg (eq (and x y) 0))
|
|
|
|
;; which is rewritten by simplify_rtx as
|
|
|
|
;; plus (eq (and x y) 0) -1.
|
|
|
|
|
2013-05-01 12:33:57 +02:00
|
|
|
(define_insn "aarch64_cmtst<mode>"
|
2017-08-31 11:52:38 +02:00
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
|
|
|
(plus:<V_INT_EQUIV>
|
|
|
|
(eq:<V_INT_EQUIV>
|
[AArch64] Remove/merge redundant iterators
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, orn<mode>3,
bic<mode>3, add<mode>3, sub<mode>3, neg<mode>2, abs<mode>2, and<mode>3,
ior<mode>3, xor<mode>3, one_cmpl<mode>2,
aarch64_simd_lshr<mode> ,arch64_simd_ashr<mode>,
aarch64_simd_imm_shl<mode>, aarch64_simd_reg_sshl<mode>,
aarch64_simd_reg_shl<mode>_unsigned, aarch64_simd_reg_shr<mode>_signed,
ashl<mode>3, lshr<mode>3, ashr<mode>3, vashl<mode>3,
reduc_plus_scal_<mode>, aarch64_vcond_internal<mode><mode>,
vcondu<mode><mode>, aarch64_cm<optab><mode>, aarch64_cmtst<mode>):
Change VDQ to VDQ_I.
(mul<mode>3): Change VDQM to VDQ_BHSI.
(aarch64_simd_vec_set<mode>,vashr<mode>3, vlshr<mode>3, vec_set<mode>,
aarch64_mla<mode>, aarch64_mls<mode>, <su><maxmin><mode>3,
aarch64_<sur>h<addsub><mode>): Change VQ_S to VDQ_BHSI.
(*aarch64_<su>mlal<mode>, *aarch64_<su>mlsl<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>,
aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>, aarch64_<sur>shll_n<mode>):
Change VDW to VD_BHSI.
(*aarch64_combinez<mode>, *aarch64_combinez_be<mode>):
Change VDIC to VD_BHSI.
* config/aarch64/aarch64-simd-builtins.def (saddl, uaddl, ssubl, usubl,
saddw, uaddw, ssubw, usubw, shadd, uhadd, srhadd, urhadd, sshll_n,
ushll_n): Change BUILTIN_VDW to BUILTIN_VD_BHSI.
* config/aarch64/iterators.md (SDQ_I, VDQ, VQ_S, VSDQ_I_BHSI, VDQM, VDW,
VDIC, VDQQHS): Remove.
(Vwtype): Update comment (changing VDW to VD_BHSI).
From-SVN: r218310
2014-12-03 13:12:07 +01:00
|
|
|
(and:VDQ_I
|
|
|
|
(match_operand:VDQ_I 1 "register_operand" "w")
|
|
|
|
(match_operand:VDQ_I 2 "register_operand" "w"))
|
|
|
|
(match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
|
2017-08-31 11:52:38 +02:00
|
|
|
(match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
|
2014-09-05 12:50:04 +02:00
|
|
|
]
|
2013-05-01 12:33:57 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_tst<q>")]
|
2013-05-01 12:33:57 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn_and_split "aarch64_cmtstdi"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w,r")
|
|
|
|
(neg:DI
|
|
|
|
(ne:DI
|
|
|
|
(and:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w,r")
|
|
|
|
(match_operand:DI 2 "register_operand" "w,r"))
|
2013-05-23 12:18:19 +02:00
|
|
|
(const_int 0))))
|
|
|
|
(clobber (reg:CC CC_REGNUM))]
|
2013-05-01 12:33:57 +02:00
|
|
|
"TARGET_SIMD"
|
2014-04-22 12:49:48 +02:00
|
|
|
"#"
|
2017-12-05 15:40:37 +01:00
|
|
|
"&& reload_completed"
|
2014-04-22 12:49:48 +02:00
|
|
|
[(set (match_operand:DI 0 "register_operand")
|
|
|
|
(neg:DI
|
|
|
|
(ne:DI
|
|
|
|
(and:DI
|
|
|
|
(match_operand:DI 1 "register_operand")
|
|
|
|
(match_operand:DI 2 "register_operand"))
|
|
|
|
(const_int 0))))]
|
2013-05-01 12:33:57 +02:00
|
|
|
{
|
2014-04-22 12:49:48 +02:00
|
|
|
/* If we are in the general purpose register file,
|
|
|
|
we split to a sequence of comparison and store. */
|
|
|
|
if (GP_REGNUM_P (REGNO (operands[0]))
|
|
|
|
&& GP_REGNUM_P (REGNO (operands[1])))
|
|
|
|
{
|
|
|
|
rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
|
decl.c, [...]: Remove redundant enum from machine_mode.
gcc/ada/
* gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c,
gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c:
Remove redundant enum from machine_mode.
gcc/c-family/
* c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant
enum from machine_mode.
gcc/c/
* c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from
machine_mode.
gcc/cp/
* constexpr.c: Remove redundant enum from machine_mode.
gcc/fortran/
* trans-types.c, trans-types.h: Remove redundant enum from
machine_mode.
gcc/go/
* go-lang.c: Remove redundant enum from machine_mode.
gcc/java/
* builtins.c, java-tree.h, typeck.c: Remove redundant enum from
machine_mode.
gcc/lto/
* lto-lang.c: Remove redundant enum from machine_mode.
gcc/
* addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c,
builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h,
cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md,
config/aarch64/aarch64.c, config/aarch64/aarch64.h,
config/aarch64/aarch64.md, config/alpha/alpha-protos.h,
config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c,
config/arc/arc.h, config/arc/predicates.md,
config/arm/aarch-common-protos.h, config/arm/aarch-common.c,
config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h,
config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md,
config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c,
config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c,
config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md,
config/cr16/cr16-protos.h, config/cr16/cr16.c,
config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md,
config/darwin-protos.h, config/darwin.c,
config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c,
config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md,
config/h8300/h8300-protos.h, config/h8300/h8300.c,
config/i386/i386-builtin-types.awk, config/i386/i386-protos.h,
config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md,
config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h,
config/ia64/ia64.c, config/iq2000/iq2000-protos.h,
config/iq2000/iq2000.c, config/iq2000/iq2000.md,
config/lm32/lm32-protos.h, config/lm32/lm32.c,
config/m32c/m32c-protos.h, config/m32c/m32c.c,
config/m32r/m32r-protos.h, config/m32r/m32r.c,
config/m68k/m68k-protos.h, config/m68k/m68k.c,
config/mcore/mcore-protos.h, config/mcore/mcore.c,
config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c,
config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c,
config/mips/mips-protos.h, config/mips/mips.c,
config/mmix/mmix-protos.h, config/mmix/mmix.c,
config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430-protos.h,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-protos.h, config/nds32/nds32.c,
config/nios2/nios2-protos.h, config/nios2/nios2.c,
config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h,
config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000-c.c,
config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c,
config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c,
config/s390/predicates.md, config/s390/s390-protos.h,
config/s390/s390.c, config/s390/s390.h, config/s390/s390.md,
config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c,
config/sh/sh.md, config/sparc/predicates.md,
config/sparc/sparc-protos.h, config/sparc/sparc.c,
config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c,
config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c,
config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c,
config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h,
config/tilepro/tilepro.c, config/v850/v850-protos.h,
config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h,
config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h,
config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h,
dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi,
doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c,
dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h,
except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c,
fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h,
fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c,
genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c,
graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c,
internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c,
ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h,
libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c,
loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c,
lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c,
lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h,
output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h,
recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c,
regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h,
rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c,
sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c,
simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def,
targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c,
tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c,
tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c,
tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c,
tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c,
tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c,
tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c,
tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c,
tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Remove redundant enum from
machine_mode.
gcc/
* gengtype.c (main): Treat machine_mode as a scalar typedef.
* genmodes.c (emit_insn_modes_h): Hide inline functions if
USED_FOR_TARGET.
From-SVN: r216834
2014-10-29 13:02:45 +01:00
|
|
|
machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
|
2014-04-22 12:49:48 +02:00
|
|
|
rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
|
|
|
|
rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
|
|
|
|
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
/* Otherwise, we expand to a similar pattern which does not
|
|
|
|
clobber CC_REGNUM. */
|
2013-05-01 12:33:57 +02:00
|
|
|
}
|
2014-04-22 12:49:48 +02:00
|
|
|
[(set_attr "type" "neon_tst,multiple")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_cmtstdi"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w")
|
|
|
|
(neg:DI
|
|
|
|
(ne:DI
|
|
|
|
(and:DI
|
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
|
|
|
(match_operand:DI 2 "register_operand" "w"))
|
|
|
|
(const_int 0))))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"cmtst\t%d0, %d1, %d2"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_tst")]
|
2013-05-01 12:33:57 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; fcm(eq|ge|gt|le|lt)
|
|
|
|
|
|
|
|
(define_insn "aarch64_cm<optab><mode>"
|
2017-08-31 11:52:38 +02:00
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
|
|
|
|
(neg:<V_INT_EQUIV>
|
|
|
|
(COMPARISONS:<V_INT_EQUIV>
|
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
gcc/
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
mode.
(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise.
* config/aarch64/arm_neon.h: Include arm_fp16.h.
* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
Support HF mode.
* config/aarch64/arm_fp16.h: New file.
(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
vsqrth_f16): New.
From-SVN: r238722
2016-07-25 18:00:28 +02:00
|
|
|
(match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
|
|
|
|
(match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
|
2013-05-01 12:33:57 +02:00
|
|
|
)))]
|
2013-01-08 15:57:33 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"@
|
2013-05-01 12:33:57 +02:00
|
|
|
fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
|
|
|
|
fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_compare_<stype><q>")]
|
2013-01-08 15:57:33 +01:00
|
|
|
)
|
|
|
|
|
2013-05-01 12:46:00 +02:00
|
|
|
;; fac(ge|gt)
|
|
|
|
;; Note we can also handle what would be fac(le|lt) by
|
|
|
|
;; generating fac(ge|gt).
|
|
|
|
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
(define_insn "aarch64_fac<optab><mode>"
|
2017-08-31 11:52:38 +02:00
|
|
|
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
|
|
|
(neg:<V_INT_EQUIV>
|
|
|
|
(FAC_COMPARISONS:<V_INT_EQUIV>
|
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
From-SVN: r238723
2016-07-25 18:10:52 +02:00
|
|
|
(abs:VHSDF_HSDF
|
|
|
|
(match_operand:VHSDF_HSDF 1 "register_operand" "w"))
|
|
|
|
(abs:VHSDF_HSDF
|
|
|
|
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))
|
2013-05-01 12:46:00 +02:00
|
|
|
)))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_compare_<stype><q>")]
|
2013-05-01 12:46:00 +02:00
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; addp
|
|
|
|
|
|
|
|
(define_insn "aarch64_addp<mode>"
|
|
|
|
[(set (match_operand:VD_BHSI 0 "register_operand" "=w")
|
|
|
|
(unspec:VD_BHSI
|
|
|
|
[(match_operand:VD_BHSI 1 "register_operand" "w")
|
|
|
|
(match_operand:VD_BHSI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_ADDP))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_add<q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_addpdi"
|
|
|
|
[(set (match_operand:DI 0 "register_operand" "=w")
|
|
|
|
(unspec:DI
|
|
|
|
[(match_operand:V2DI 1 "register_operand" "w")]
|
|
|
|
UNSPEC_ADDP))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"addp\t%d0, %1.2d"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_reduc_add")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; sqrt
|
|
|
|
|
2016-06-13 21:02:56 +02:00
|
|
|
(define_expand "sqrt<mode>2"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
|
2016-06-13 21:02:56 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "*sqrt<mode>2"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"fsqrt\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_sqrt_<stype><q>")]
|
2012-10-23 19:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
;; Patterns for vector struct loads and stores.
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_insn "aarch64_simd_ld2<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load2_2reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(define_insn "aarch64_simd_ld2r<mode>"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
2015-09-15 14:48:15 +02:00
|
|
|
(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
|
|
|
UNSPEC_LD2_DUP))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load2_all_lanes<q>")]
|
|
|
|
)
|
|
|
|
|
2014-10-24 16:58:51 +02:00
|
|
|
(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
2015-09-15 14:48:15 +02:00
|
|
|
(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
2014-10-24 16:58:51 +02:00
|
|
|
(match_operand:OI 2 "register_operand" "0")
|
|
|
|
(match_operand:SI 3 "immediate_operand" "i")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
2014-10-24 16:58:51 +02:00
|
|
|
UNSPEC_LD2_LANE))]
|
|
|
|
"TARGET_SIMD"
|
2015-07-22 12:44:16 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
|
2015-07-22 12:44:16 +02:00
|
|
|
return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
|
|
|
|
}
|
2014-10-24 16:58:51 +02:00
|
|
|
[(set_attr "type" "neon_load2_one_lane")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_load_lanesoi<mode>"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (OImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
|
|
|
|
emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_st2<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store2_2reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2015-04-29 18:10:27 +02:00
|
|
|
;; RTL uses GCC vector extension indices, so flip only for assembly.
|
2015-09-15 13:39:12 +02:00
|
|
|
(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
|
2015-09-15 14:48:15 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
|
2014-04-28 23:05:51 +02:00
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
2015-07-22 12:44:16 +02:00
|
|
|
UNSPEC_ST2_LANE))]
|
2014-04-28 23:05:51 +02:00
|
|
|
"TARGET_SIMD"
|
2015-04-29 18:10:27 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2015-04-29 18:10:27 +02:00
|
|
|
return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
|
|
|
|
}
|
2015-07-22 12:56:40 +02:00
|
|
|
[(set_attr "type" "neon_store2_one_lane<q>")]
|
2014-04-28 23:05:51 +02:00
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_store_lanesoi<mode>"
|
|
|
|
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (OImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
|
|
|
|
emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_ld3<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
|
|
|
(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load3_3reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(define_insn "aarch64_simd_ld3r<mode>"
|
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
2015-09-15 14:28:20 +02:00
|
|
|
(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
|
|
|
UNSPEC_LD3_DUP))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load3_all_lanes<q>")]
|
|
|
|
)
|
|
|
|
|
2014-10-24 16:58:51 +02:00
|
|
|
(define_insn "aarch64_vec_load_lanesci_lane<mode>"
|
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
2015-09-15 14:28:20 +02:00
|
|
|
(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
2014-10-24 16:58:51 +02:00
|
|
|
(match_operand:CI 2 "register_operand" "0")
|
|
|
|
(match_operand:SI 3 "immediate_operand" "i")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
2014-10-24 16:58:51 +02:00
|
|
|
UNSPEC_LD3_LANE))]
|
|
|
|
"TARGET_SIMD"
|
2015-07-22 12:44:16 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
|
2015-07-22 12:44:16 +02:00
|
|
|
return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
|
|
|
|
}
|
2014-10-24 16:58:51 +02:00
|
|
|
[(set_attr "type" "neon_load3_one_lane")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_load_lanesci<mode>"
|
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
|
|
|
(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (CImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
|
|
|
|
emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_st3<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:CI [(match_operand:CI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store3_3reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2015-04-29 18:10:27 +02:00
|
|
|
;; RTL uses GCC vector extension indices, so flip only for assembly.
|
2015-09-15 13:39:12 +02:00
|
|
|
(define_insn "aarch64_vec_store_lanesci_lane<mode>"
|
2015-09-15 14:28:20 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
|
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
UNSPEC_ST3_LANE))]
|
2014-04-28 23:05:51 +02:00
|
|
|
"TARGET_SIMD"
|
2015-04-29 18:10:27 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2015-04-29 18:10:27 +02:00
|
|
|
return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
|
|
|
|
}
|
2014-04-28 23:05:51 +02:00
|
|
|
[(set_attr "type" "neon_store3_one_lane<q>")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_store_lanesci<mode>"
|
|
|
|
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:CI [(match_operand:CI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (CImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
|
|
|
|
emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_ld4<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
|
|
|
(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load4_4reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(define_insn "aarch64_simd_ld4r<mode>"
|
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
2015-09-15 14:43:07 +02:00
|
|
|
(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
|
|
|
UNSPEC_LD4_DUP))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load4_all_lanes<q>")]
|
|
|
|
)
|
|
|
|
|
2014-10-24 16:58:51 +02:00
|
|
|
(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
|
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
2015-09-15 14:43:07 +02:00
|
|
|
(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
2014-10-24 16:58:51 +02:00
|
|
|
(match_operand:XI 2 "register_operand" "0")
|
|
|
|
(match_operand:SI 3 "immediate_operand" "i")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
2014-10-24 16:58:51 +02:00
|
|
|
UNSPEC_LD4_LANE))]
|
|
|
|
"TARGET_SIMD"
|
2015-07-22 12:44:16 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
|
2015-07-22 12:44:16 +02:00
|
|
|
return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
|
|
|
|
}
|
2014-10-24 16:58:51 +02:00
|
|
|
[(set_attr "type" "neon_load4_one_lane")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_load_lanesxi<mode>"
|
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
|
|
|
(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (XImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
|
|
|
|
emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_st4<mode>"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:XI [(match_operand:XI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store4_4reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2015-04-29 18:10:27 +02:00
|
|
|
;; RTL uses GCC vector extension indices, so flip only for assembly.
|
2015-09-15 13:39:12 +02:00
|
|
|
(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
|
2015-09-15 14:43:07 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
|
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
UNSPEC_ST4_LANE))]
|
2014-04-28 23:05:51 +02:00
|
|
|
"TARGET_SIMD"
|
2015-04-29 18:10:27 +02:00
|
|
|
{
|
[AArch64] Add an endian_lane_rtx helper routine
Later patches turn the number of vector units into a poly_int.
We deliberately don't support applying GEN_INT to those (except
in target code that doesn't distinguish between poly_ints and normal
constants); gen_int_mode needs to be used instead.
This patch therefore replaces instances of:
GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc])))
with uses of a new endian_lane_rtx function.
2017-11-06 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare.
* config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function.
* config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number
of units rather than the mode.
* config/aarch64/iterators.md (nunits): New mode attribute.
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...).
* config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>)
(aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>)
(*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise.
(*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>)
(*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>)
(*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise.
(*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise.
(*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise.
(*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise.
(reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise.
(reduc_<maxmin_uns>_scal_<mode>): Likewise.
(*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise.
(*aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>)
(*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise.
(aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>)
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise.
(aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise.
(aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise.
(aarch64_sqdmull2_lane<mode>_internal): Likewise.
(aarch64_sqdmull2_laneq<mode>_internal): Likewise.
(aarch64_vec_load_lanesoi_lane<mode>): Likewise.
(aarch64_vec_store_lanesoi_lane<mode>): Likewise.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_store_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N.
(aarch64_simd_vec_setv2di): Likewise.
Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r254466
2017-11-06 21:02:10 +01:00
|
|
|
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
|
2015-04-29 18:10:27 +02:00
|
|
|
return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
|
|
|
|
}
|
2014-04-28 23:05:51 +02:00
|
|
|
[(set_attr "type" "neon_store4_one_lane<q>")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:44 +01:00
|
|
|
(define_expand "vec_store_lanesxi<mode>"
|
|
|
|
[(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:XI [(match_operand:XI 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
rtx tmp = gen_reg_rtx (XImode);
|
2017-11-06 21:02:27 +01:00
|
|
|
rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
|
2015-01-21 18:53:44 +01:00
|
|
|
emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
|
|
|
|
emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn_and_split "aarch64_rev_reglist<mode>"
|
|
|
|
[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
|
|
|
|
(unspec:VSTRUCT
|
|
|
|
[(match_operand:VSTRUCT 1 "register_operand" "w")
|
|
|
|
(match_operand:V16QI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_REV_REGLIST))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"#"
|
|
|
|
"&& reload_completed"
|
|
|
|
[(const_int 0)]
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
|
|
|
|
for (i = 0; i < nregs; i++)
|
|
|
|
{
|
|
|
|
rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
|
|
|
|
rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
|
|
|
|
emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
|
|
|
|
}
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_tbl1_q")
|
|
|
|
(set_attr "length" "<insn_count>")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Reload patterns for AdvSIMD register list operands.
|
|
|
|
|
|
|
|
(define_expand "mov<mode>"
|
2015-01-21 18:53:31 +01:00
|
|
|
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
|
|
|
|
(match_operand:VSTRUCT 1 "general_operand" ""))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
if (can_create_pseudo_p ())
|
|
|
|
{
|
|
|
|
if (GET_CODE (operands[0]) != REG)
|
|
|
|
operands[1] = force_reg (<MODE>mode, operands[1]);
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "*aarch64_mov<mode>"
|
|
|
|
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
|
2015-01-21 18:53:31 +01:00
|
|
|
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
|
|
|
|
"TARGET_SIMD && !BYTES_BIG_ENDIAN
|
2012-10-23 19:02:30 +02:00
|
|
|
&& (register_operand (operands[0], <MODE>mode)
|
|
|
|
|| register_operand (operands[1], <MODE>mode))"
|
2015-01-21 18:53:31 +01:00
|
|
|
"@
|
|
|
|
#
|
|
|
|
st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
|
|
|
|
ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
|
|
|
|
[(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
|
|
|
|
neon_load<nregs>_<nregs>reg_q")
|
2016-05-31 13:04:41 +02:00
|
|
|
(set_attr "length" "<insn_count>,4,4")]
|
2013-11-15 18:05:37 +01:00
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
2014-01-23 15:46:31 +01:00
|
|
|
(define_insn "aarch64_be_ld1<mode>"
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
[(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
|
|
|
|
(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
|
|
|
|
"aarch64_simd_struct_operand" "Utv")]
|
2014-01-23 15:46:31 +01:00
|
|
|
UNSPEC_LD1))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld1\\t{%0<Vmtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load1_1reg<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_be_st1<mode>"
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
[(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
|
2014-01-23 15:46:31 +01:00
|
|
|
UNSPEC_ST1))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st1\\t{%1<Vmtype>}, %0"
|
|
|
|
[(set_attr "type" "neon_store1_1reg<q>")]
|
|
|
|
)
|
|
|
|
|
2015-01-21 18:53:31 +01:00
|
|
|
(define_insn "*aarch64_be_movoi"
|
|
|
|
[(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
|
|
|
|
(match_operand:OI 1 "general_operand" " w,w,m"))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN
|
|
|
|
&& (register_operand (operands[0], OImode)
|
|
|
|
|| register_operand (operands[1], OImode))"
|
|
|
|
"@
|
|
|
|
#
|
|
|
|
stp\\t%q1, %R1, %0
|
|
|
|
ldp\\t%q0, %R0, %1"
|
2015-09-29 18:30:45 +02:00
|
|
|
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
|
2016-05-31 13:04:41 +02:00
|
|
|
(set_attr "length" "8,4,4")]
|
2015-01-21 18:53:31 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_be_movci"
|
|
|
|
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
|
|
|
|
(match_operand:CI 1 "general_operand" " w,w,o"))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN
|
|
|
|
&& (register_operand (operands[0], CImode)
|
|
|
|
|| register_operand (operands[1], CImode))"
|
|
|
|
"#"
|
|
|
|
[(set_attr "type" "multiple")
|
2016-05-31 13:04:41 +02:00
|
|
|
(set_attr "length" "12,4,4")]
|
2015-01-21 18:53:31 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "*aarch64_be_movxi"
|
|
|
|
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
|
|
|
|
(match_operand:XI 1 "general_operand" " w,w,o"))]
|
|
|
|
"TARGET_SIMD && BYTES_BIG_ENDIAN
|
|
|
|
&& (register_operand (operands[0], XImode)
|
|
|
|
|| register_operand (operands[1], XImode))"
|
|
|
|
"#"
|
|
|
|
[(set_attr "type" "multiple")
|
2016-05-31 13:04:41 +02:00
|
|
|
(set_attr "length" "16,4,4")]
|
2015-01-21 18:53:31 +01:00
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_split
|
2015-01-21 18:53:31 +01:00
|
|
|
[(set (match_operand:OI 0 "register_operand")
|
|
|
|
(match_operand:OI 1 "register_operand"))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD && reload_completed"
|
2015-01-21 18:53:31 +01:00
|
|
|
[(const_int 0)]
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
2015-01-21 18:53:31 +01:00
|
|
|
aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
|
|
|
|
DONE;
|
2012-10-23 19:02:30 +02:00
|
|
|
})
|
|
|
|
|
|
|
|
(define_split
|
2015-01-21 18:53:31 +01:00
|
|
|
[(set (match_operand:CI 0 "nonimmediate_operand")
|
|
|
|
(match_operand:CI 1 "general_operand"))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD && reload_completed"
|
2015-01-21 18:53:31 +01:00
|
|
|
[(const_int 0)]
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
2015-01-21 18:53:31 +01:00
|
|
|
if (register_operand (operands[0], CImode)
|
|
|
|
&& register_operand (operands[1], CImode))
|
|
|
|
{
|
|
|
|
aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
|
|
|
|
simplify_gen_subreg (OImode, operands[1], CImode, 0));
|
|
|
|
emit_move_insn (gen_lowpart (V16QImode,
|
|
|
|
simplify_gen_subreg (TImode, operands[0],
|
|
|
|
CImode, 32)),
|
|
|
|
gen_lowpart (V16QImode,
|
|
|
|
simplify_gen_subreg (TImode, operands[1],
|
|
|
|
CImode, 32)));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
FAIL;
|
2012-10-23 19:02:30 +02:00
|
|
|
})
|
|
|
|
|
|
|
|
(define_split
|
2015-01-21 18:53:31 +01:00
|
|
|
[(set (match_operand:XI 0 "nonimmediate_operand")
|
|
|
|
(match_operand:XI 1 "general_operand"))]
|
2012-10-23 19:02:30 +02:00
|
|
|
"TARGET_SIMD && reload_completed"
|
2015-01-21 18:53:31 +01:00
|
|
|
[(const_int 0)]
|
2012-10-23 19:02:30 +02:00
|
|
|
{
|
2015-01-21 18:53:31 +01:00
|
|
|
if (register_operand (operands[0], XImode)
|
|
|
|
&& register_operand (operands[1], XImode))
|
|
|
|
{
|
|
|
|
aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else if (BYTES_BIG_ENDIAN)
|
|
|
|
{
|
|
|
|
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
|
|
|
|
simplify_gen_subreg (OImode, operands[1], XImode, 0));
|
|
|
|
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
|
|
|
|
simplify_gen_subreg (OImode, operands[1], XImode, 32));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
FAIL;
|
2012-10-23 19:02:30 +02:00
|
|
|
})
|
|
|
|
|
2015-09-15 15:09:45 +02:00
|
|
|
(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
|
|
|
|
[(match_operand:VSTRUCT 0 "register_operand" "=w")
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2015-09-15 14:48:15 +02:00
|
|
|
rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
|
2015-09-15 15:09:45 +02:00
|
|
|
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
|
|
|
|
* <VSTRUCT:nregs>);
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
|
2015-09-15 15:09:45 +02:00
|
|
|
emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
|
|
|
|
mem));
|
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
From-SVN: r216630
2014-10-24 12:53:08 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld2<mode>_dreg"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD2_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2012-10-23 19:02:30 +02:00
|
|
|
"ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load2_2reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld2<mode>_dreg"
|
2016-08-30 17:40:19 +02:00
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD2_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2016-08-30 17:40:19 +02:00
|
|
|
"ld1\\t{%S0.1d - %T0.1d}, %1"
|
|
|
|
[(set_attr "type" "neon_load1_2reg<q>")]
|
|
|
|
)
|
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld3<mode>_dreg"
|
2016-08-30 17:40:19 +02:00
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD3_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2016-08-30 17:40:19 +02:00
|
|
|
"ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load3_3reg<q>")]
|
|
|
|
)
|
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld3<mode>_dreg"
|
2016-08-30 17:40:19 +02:00
|
|
|
[(set (match_operand:CI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD3_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2012-10-23 19:02:30 +02:00
|
|
|
"ld1\\t{%S0.1d - %U0.1d}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load1_3reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld4<mode>_dreg"
|
2012-10-23 19:02:30 +02:00
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD4_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2016-08-30 17:40:19 +02:00
|
|
|
"ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load4_4reg<q>")]
|
|
|
|
)
|
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(define_insn "aarch64_ld4<mode>_dreg"
|
2016-08-30 17:40:19 +02:00
|
|
|
[(set (match_operand:XI 0 "register_operand" "=w")
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD4_DREG))]
|
|
|
|
"TARGET_SIMD"
|
2012-10-23 19:02:30 +02:00
|
|
|
"ld1\\t{%S0.1d - %V0.1d}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load1_4reg<q>")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
|
|
|
|
[(match_operand:VSTRUCT 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "r")
|
|
|
|
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2015-09-15 14:11:27 +02:00
|
|
|
rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
|
|
|
|
set_mem_size (mem, <VSTRUCT:nregs> * 8);
|
2012-10-23 19:02:30 +02:00
|
|
|
|
[AArch64] Remove use of wider vector modes
The AArch64 port defined x2, x3 and x4 vector modes that were only used
in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that
this rtl would have led to any valid simplifications, since the values
involved were unspecs that had a different number of operands from the
non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while
the qreg one had two operands.)
As it happened, the patterns led to invalid simplifications on big-
endian targets due to a mix-up in the operand order, see Tamar's fix
in r240271.
This patch therefore replaces the rtl patterns with dedicated unspecs.
This allows the x2, x3 and x4 modes to be removed, avoiding a clash
with 256-bit and 512-bit SVE.
2017-08-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte
vector modes.
* config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete.
* config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG)
(UNSPEC_LD4_DREG): New unspecs.
* config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le)
(aarch64_ld2<mode>_dreg_be): Replace with...
(aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld3<mode>_dreg_le)
(aarch64_ld3<mode>_dreg_be): Replace with...
(aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG
unspec.
(aarch64_ld4<mode>_dreg_le)
(aarch64_ld4<mode>_dreg_be): Replace with...
(aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG
unspec.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r251555
2017-08-31 11:51:40 +02:00
|
|
|
emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
(define_expand "aarch64_ld1<VALL_F16:mode>"
|
|
|
|
[(match_operand:VALL_F16 0 "register_operand")
|
2013-07-03 11:48:02 +02:00
|
|
|
(match_operand:DI 1 "register_operand")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
machine_mode mode = <VALL_F16:MODE>mode;
|
2013-07-03 11:48:02 +02:00
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
2014-01-23 15:46:31 +01:00
|
|
|
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
|
2014-01-23 15:46:31 +01:00
|
|
|
else
|
|
|
|
emit_move_insn (operands[0], mem);
|
2013-07-03 11:48:02 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
|
|
|
|
[(match_operand:VSTRUCT 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "r")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
decl.c, [...]: Remove redundant enum from machine_mode.
gcc/ada/
* gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c,
gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c:
Remove redundant enum from machine_mode.
gcc/c-family/
* c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant
enum from machine_mode.
gcc/c/
* c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from
machine_mode.
gcc/cp/
* constexpr.c: Remove redundant enum from machine_mode.
gcc/fortran/
* trans-types.c, trans-types.h: Remove redundant enum from
machine_mode.
gcc/go/
* go-lang.c: Remove redundant enum from machine_mode.
gcc/java/
* builtins.c, java-tree.h, typeck.c: Remove redundant enum from
machine_mode.
gcc/lto/
* lto-lang.c: Remove redundant enum from machine_mode.
gcc/
* addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c,
builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h,
cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md,
config/aarch64/aarch64.c, config/aarch64/aarch64.h,
config/aarch64/aarch64.md, config/alpha/alpha-protos.h,
config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c,
config/arc/arc.h, config/arc/predicates.md,
config/arm/aarch-common-protos.h, config/arm/aarch-common.c,
config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h,
config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md,
config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c,
config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c,
config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md,
config/cr16/cr16-protos.h, config/cr16/cr16.c,
config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md,
config/darwin-protos.h, config/darwin.c,
config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c,
config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md,
config/h8300/h8300-protos.h, config/h8300/h8300.c,
config/i386/i386-builtin-types.awk, config/i386/i386-protos.h,
config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md,
config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h,
config/ia64/ia64.c, config/iq2000/iq2000-protos.h,
config/iq2000/iq2000.c, config/iq2000/iq2000.md,
config/lm32/lm32-protos.h, config/lm32/lm32.c,
config/m32c/m32c-protos.h, config/m32c/m32c.c,
config/m32r/m32r-protos.h, config/m32r/m32r.c,
config/m68k/m68k-protos.h, config/m68k/m68k.c,
config/mcore/mcore-protos.h, config/mcore/mcore.c,
config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c,
config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c,
config/mips/mips-protos.h, config/mips/mips.c,
config/mmix/mmix-protos.h, config/mmix/mmix.c,
config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430-protos.h,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-protos.h, config/nds32/nds32.c,
config/nios2/nios2-protos.h, config/nios2/nios2.c,
config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h,
config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000-c.c,
config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c,
config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c,
config/s390/predicates.md, config/s390/s390-protos.h,
config/s390/s390.c, config/s390/s390.h, config/s390/s390.md,
config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c,
config/sh/sh.md, config/sparc/predicates.md,
config/sparc/sparc-protos.h, config/sparc/sparc.c,
config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c,
config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c,
config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c,
config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h,
config/tilepro/tilepro.c, config/v850/v850-protos.h,
config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h,
config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h,
config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h,
dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi,
doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c,
dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h,
except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c,
fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h,
fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c,
genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c,
graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c,
internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c,
ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h,
libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c,
loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c,
lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c,
lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h,
output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h,
recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c,
regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h,
rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c,
sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c,
simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def,
targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c,
tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c,
tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c,
tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c,
tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c,
tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c,
tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c,
tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c,
tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Remove redundant enum from
machine_mode.
gcc/
* gengtype.c (main): Treat machine_mode as a scalar typedef.
* genmodes.c (emit_insn_modes_h): Hide inline functions if
USED_FOR_TARGET.
From-SVN: r216834
2014-10-29 13:02:45 +01:00
|
|
|
machine_mode mode = <VSTRUCT:MODE>mode;
|
2012-10-23 19:02:30 +02:00
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
|
|
|
|
2015-09-02 16:04:22 +02:00
|
|
|
emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2017-12-27 12:47:45 +01:00
|
|
|
(define_expand "aarch64_ld1x2<VQ:mode>"
|
|
|
|
[(match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "r")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
machine_mode mode = OImode;
|
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_ld1x2<VDC:mode>"
|
|
|
|
[(match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(match_operand:DI 1 "register_operand" "r")
|
|
|
|
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
machine_mode mode = OImode;
|
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2015-09-15 15:09:45 +02:00
|
|
|
(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
|
|
|
|
[(match_operand:VSTRUCT 0 "register_operand" "=w")
|
2014-10-24 16:58:51 +02:00
|
|
|
(match_operand:DI 1 "register_operand" "w")
|
2015-09-15 15:09:45 +02:00
|
|
|
(match_operand:VSTRUCT 2 "register_operand" "0")
|
2014-10-24 16:58:51 +02:00
|
|
|
(match_operand:SI 3 "immediate_operand" "i")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
2014-10-24 16:58:51 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2015-09-15 14:28:20 +02:00
|
|
|
rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
|
2015-09-15 15:09:45 +02:00
|
|
|
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
|
|
|
|
* <VSTRUCT:nregs>);
|
|
|
|
|
|
|
|
aarch64_simd_lane_bounds (operands[3], 0,
|
|
|
|
GET_MODE_NUNITS (<VALLDIF:MODE>mode),
|
|
|
|
NULL);
|
|
|
|
emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
|
|
|
|
operands[0], mem, operands[2], operands[3]));
|
2014-10-24 16:58:51 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Expanders for builtins to extract vector registers from large
|
|
|
|
;; opaque integer modes.
|
|
|
|
|
|
|
|
;; D-register list.
|
|
|
|
|
|
|
|
(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
|
|
|
|
[(match_operand:VDC 0 "register_operand" "=w")
|
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int part = INTVAL (operands[2]);
|
|
|
|
rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
|
|
|
|
int offset = part * 16;
|
|
|
|
|
|
|
|
emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
|
|
|
|
emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; Q-register list.
|
|
|
|
|
|
|
|
(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
|
|
|
|
[(match_operand:VQ 0 "register_operand" "=w")
|
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "w")
|
|
|
|
(match_operand:SI 2 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int part = INTVAL (operands[2]);
|
|
|
|
int offset = part * 16;
|
|
|
|
|
|
|
|
emit_move_insn (operands[0],
|
|
|
|
gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
;; Permuted-store expanders for neon intrinsics.
|
|
|
|
|
2012-12-05 12:36:00 +01:00
|
|
|
;; Permute instructions
|
|
|
|
|
|
|
|
;; vec_perm support
|
|
|
|
|
|
|
|
(define_expand "vec_perm<mode>"
|
|
|
|
[(match_operand:VB 0 "register_operand")
|
|
|
|
(match_operand:VB 1 "register_operand")
|
|
|
|
(match_operand:VB 2 "register_operand")
|
|
|
|
(match_operand:VB 3 "register_operand")]
|
2014-06-30 13:07:58 +02:00
|
|
|
"TARGET_SIMD"
|
2012-12-05 12:36:00 +01:00
|
|
|
{
|
|
|
|
aarch64_expand_vec_perm (operands[0], operands[1],
|
2017-11-06 21:02:49 +01:00
|
|
|
operands[2], operands[3], <nunits>);
|
2012-12-05 12:36:00 +01:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_tbl1<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
|
|
|
|
(match_operand:VB 2 "register_operand" "w")]
|
|
|
|
UNSPEC_TBL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_tbl1<q>")]
|
2012-12-05 12:36:00 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
;; Two source registers.
|
|
|
|
|
|
|
|
(define_insn "aarch64_tbl2v16qi"
|
|
|
|
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
|
|
|
(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
|
|
|
|
(match_operand:V16QI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_TBL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_tbl2_q")]
|
2012-12-05 12:36:00 +01:00
|
|
|
)
|
|
|
|
|
[AArch64] Fix vqtb[lx][234] on big-endian
2015-11-06 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins
tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*.
* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to...
(aarch64_tbl3<mode>) ... this, which supports v16qi too.
(aarch64_tbx4v8qi): Rename to...
aarch64_tbx4<mode>): ... this.
(aarch64_qtbl3<mode>): New pattern.
(aarch64_qtbx3<mode>): New pattern.
(aarch64_qtbl4<mode>): New pattern.
(aarch64_qtbx4<mode>): New pattern.
* config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8)
(vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8)
(vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8)
(vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8)
(vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8)
(vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8)
(vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8)
(vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin
functions.
gcc/testsuite/
* gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test.
From-SVN: r229886
2015-11-06 21:34:12 +01:00
|
|
|
(define_insn "aarch64_tbl3<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:OI 1 "register_operand" "w")
|
|
|
|
(match_operand:VB 2 "register_operand" "w")]
|
2015-10-12 16:06:54 +02:00
|
|
|
UNSPEC_TBL))]
|
|
|
|
"TARGET_SIMD"
|
[AArch64] Fix vqtb[lx][234] on big-endian
2015-11-06 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins
tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*.
* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to...
(aarch64_tbl3<mode>) ... this, which supports v16qi too.
(aarch64_tbx4v8qi): Rename to...
aarch64_tbx4<mode>): ... this.
(aarch64_qtbl3<mode>): New pattern.
(aarch64_qtbx3<mode>): New pattern.
(aarch64_qtbl4<mode>): New pattern.
(aarch64_qtbx4<mode>): New pattern.
* config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8)
(vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8)
(vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8)
(vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8)
(vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8)
(vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8)
(vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8)
(vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin
functions.
gcc/testsuite/
* gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test.
From-SVN: r229886
2015-11-06 21:34:12 +01:00
|
|
|
"tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
|
2015-10-12 16:06:54 +02:00
|
|
|
[(set_attr "type" "neon_tbl3")]
|
|
|
|
)
|
|
|
|
|
[AArch64] Fix vqtb[lx][234] on big-endian
2015-11-06 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins
tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*.
* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to...
(aarch64_tbl3<mode>) ... this, which supports v16qi too.
(aarch64_tbx4v8qi): Rename to...
aarch64_tbx4<mode>): ... this.
(aarch64_qtbl3<mode>): New pattern.
(aarch64_qtbx3<mode>): New pattern.
(aarch64_qtbl4<mode>): New pattern.
(aarch64_qtbx4<mode>): New pattern.
* config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8)
(vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8)
(vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8)
(vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8)
(vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8)
(vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8)
(vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8)
(vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin
functions.
gcc/testsuite/
* gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test.
From-SVN: r229886
2015-11-06 21:34:12 +01:00
|
|
|
(define_insn "aarch64_tbx4<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
|
2015-10-12 16:06:54 +02:00
|
|
|
(match_operand:OI 2 "register_operand" "w")
|
[AArch64] Fix vqtb[lx][234] on big-endian
2015-11-06 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins
tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*.
* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to...
(aarch64_tbl3<mode>) ... this, which supports v16qi too.
(aarch64_tbx4v8qi): Rename to...
aarch64_tbx4<mode>): ... this.
(aarch64_qtbl3<mode>): New pattern.
(aarch64_qtbx3<mode>): New pattern.
(aarch64_qtbl4<mode>): New pattern.
(aarch64_qtbx4<mode>): New pattern.
* config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8)
(vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8)
(vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8)
(vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8)
(vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8)
(vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8)
(vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8)
(vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin
functions.
gcc/testsuite/
* gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test.
From-SVN: r229886
2015-11-06 21:34:12 +01:00
|
|
|
(match_operand:VB 3 "register_operand" "w")]
|
|
|
|
UNSPEC_TBX))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_tbl4")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Three source registers.
|
|
|
|
|
|
|
|
(define_insn "aarch64_qtbl3<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:CI 1 "register_operand" "w")
|
|
|
|
(match_operand:VB 2 "register_operand" "w")]
|
|
|
|
UNSPEC_TBL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_tbl3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_qtbx3<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
|
|
|
|
(match_operand:CI 2 "register_operand" "w")
|
|
|
|
(match_operand:VB 3 "register_operand" "w")]
|
|
|
|
UNSPEC_TBX))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_tbl3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; Four source registers.
|
|
|
|
|
|
|
|
(define_insn "aarch64_qtbl4<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:XI 1 "register_operand" "w")
|
|
|
|
(match_operand:VB 2 "register_operand" "w")]
|
|
|
|
UNSPEC_TBL))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
|
|
|
|
[(set_attr "type" "neon_tbl4")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_qtbx4<mode>"
|
|
|
|
[(set (match_operand:VB 0 "register_operand" "=w")
|
|
|
|
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
|
|
|
|
(match_operand:XI 2 "register_operand" "w")
|
|
|
|
(match_operand:VB 3 "register_operand" "w")]
|
2015-10-12 16:06:54 +02:00
|
|
|
UNSPEC_TBX))]
|
|
|
|
"TARGET_SIMD"
|
[AArch64] Fix vqtb[lx][234] on big-endian
2015-11-06 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins
tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*.
* config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to...
(aarch64_tbl3<mode>) ... this, which supports v16qi too.
(aarch64_tbx4v8qi): Rename to...
aarch64_tbx4<mode>): ... this.
(aarch64_qtbl3<mode>): New pattern.
(aarch64_qtbx3<mode>): New pattern.
(aarch64_qtbl4<mode>): New pattern.
(aarch64_qtbx4<mode>): New pattern.
* config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8)
(vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8)
(vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8)
(vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8)
(vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8)
(vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8)
(vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8)
(vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin
functions.
gcc/testsuite/
* gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test.
From-SVN: r229886
2015-11-06 21:34:12 +01:00
|
|
|
"tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
|
2015-10-12 16:06:54 +02:00
|
|
|
[(set_attr "type" "neon_tbl4")]
|
|
|
|
)
|
|
|
|
|
2012-12-05 12:36:00 +01:00
|
|
|
(define_insn_and_split "aarch64_combinev16qi"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
|
|
|
|
(match_operand:V16QI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_CONCAT))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"#"
|
|
|
|
"&& reload_completed"
|
|
|
|
[(const_int 0)]
|
|
|
|
{
|
|
|
|
aarch64_split_combinev16qi (operands);
|
|
|
|
DONE;
|
2013-10-15 17:30:00 +02:00
|
|
|
}
|
|
|
|
[(set_attr "type" "multiple")]
|
|
|
|
)
|
2012-12-05 12:36:00 +01:00
|
|
|
|
2017-11-01 21:40:04 +01:00
|
|
|
;; This instruction's pattern is generated directly by
|
|
|
|
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
|
|
|
|
;; need corresponding changes there.
|
2012-12-05 12:42:37 +01:00
|
|
|
(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
|
2016-07-25 16:02:42 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
|
|
|
|
(match_operand:VALL_F16 2 "register_operand" "w")]
|
|
|
|
PERMUTE))]
|
2012-12-05 12:42:37 +01:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_permute<q>")]
|
2012-12-05 12:42:37 +01:00
|
|
|
)
|
|
|
|
|
2017-11-01 21:40:04 +01:00
|
|
|
;; This instruction's pattern is generated directly by
|
|
|
|
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
|
|
|
|
;; need corresponding changes there. Note that the immediate (third)
|
|
|
|
;; operand is a lane index not a byte index.
|
Detect EXT patterns to vec_perm_const, use for EXT intrinsics
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
TYPES_BINOPV): New static data.
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
New patterns.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
patterns for EXT.
(aarch64_evpc_ext): New function.
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
From-SVN: r211058
2014-05-29 18:57:42 +02:00
|
|
|
(define_insn "aarch64_ext<mode>"
|
2016-07-25 16:02:42 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
|
|
|
|
(match_operand:VALL_F16 2 "register_operand" "w")
|
|
|
|
(match_operand:SI 3 "immediate_operand" "i")]
|
|
|
|
UNSPEC_EXT))]
|
Detect EXT patterns to vec_perm_const, use for EXT intrinsics
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
TYPES_BINOPV): New static data.
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
New patterns.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
patterns for EXT.
(aarch64_evpc_ext): New function.
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
From-SVN: r211058
2014-05-29 18:57:42 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
operands[3] = GEN_INT (INTVAL (operands[3])
|
2015-07-30 15:06:25 +02:00
|
|
|
* GET_MODE_UNIT_SIZE (<MODE>mode));
|
Detect EXT patterns to vec_perm_const, use for EXT intrinsics
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
TYPES_BINOPV): New static data.
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
New patterns.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
patterns for EXT.
(aarch64_evpc_ext): New function.
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
From-SVN: r211058
2014-05-29 18:57:42 +02:00
|
|
|
return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
|
|
|
|
}
|
|
|
|
[(set_attr "type" "neon_ext<q>")]
|
|
|
|
)
|
|
|
|
|
2017-11-01 21:40:04 +01:00
|
|
|
;; This instruction's pattern is generated directly by
|
|
|
|
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
|
|
|
|
;; need corresponding changes there.
|
Recognize shuffle patterns for REV instructions on AArch64, rewrite intrinsics.
* config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
New pattern.
* config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
(aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
* config/aarch64/iterators.md (REVERSE): New iterator.
(UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
(rev_op): New int_attribute.
* config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
Replace temporary __asm__ with __builtin_shuffle.
From-SVN: r211174
2014-06-03 13:28:55 +02:00
|
|
|
(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
|
2016-07-25 16:02:42 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
|
Recognize shuffle patterns for REV instructions on AArch64, rewrite intrinsics.
* config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
New pattern.
* config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
(aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
* config/aarch64/iterators.md (REVERSE): New iterator.
(UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
(rev_op): New int_attribute.
* config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
Replace temporary __asm__ with __builtin_shuffle.
From-SVN: r211174
2014-06-03 13:28:55 +02:00
|
|
|
REVERSE))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_rev<q>")]
|
|
|
|
)
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
(define_insn "aarch64_st2<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store2_2reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_st2<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST2))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st1\\t{%S1.1d - %T1.1d}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store1_2reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_st3<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store3_3reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_st3<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST3))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st1\\t{%S1.1d - %U1.1d}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store1_3reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_st4<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store4_4reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_insn "aarch64_st4<mode>_dreg"
|
2015-09-15 14:11:27 +02:00
|
|
|
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
|
|
|
|
(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
|
2012-10-23 19:02:30 +02:00
|
|
|
(unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_ST4))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"st1\\t{%S1.1d - %V1.1d}, %0"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_store1_4reg")]
|
|
|
|
)
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
|
|
|
|
[(match_operand:DI 0 "register_operand" "r")
|
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "w")
|
|
|
|
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2015-09-15 14:11:27 +02:00
|
|
|
rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
|
|
|
|
set_mem_size (mem, <VSTRUCT:nregs> * 8);
|
2012-10-23 19:02:30 +02:00
|
|
|
|
|
|
|
emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
|
|
|
|
[(match_operand:DI 0 "register_operand" "r")
|
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "w")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
decl.c, [...]: Remove redundant enum from machine_mode.
gcc/ada/
* gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c,
gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c:
Remove redundant enum from machine_mode.
gcc/c-family/
* c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant
enum from machine_mode.
gcc/c/
* c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from
machine_mode.
gcc/cp/
* constexpr.c: Remove redundant enum from machine_mode.
gcc/fortran/
* trans-types.c, trans-types.h: Remove redundant enum from
machine_mode.
gcc/go/
* go-lang.c: Remove redundant enum from machine_mode.
gcc/java/
* builtins.c, java-tree.h, typeck.c: Remove redundant enum from
machine_mode.
gcc/lto/
* lto-lang.c: Remove redundant enum from machine_mode.
gcc/
* addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c,
builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h,
cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md,
config/aarch64/aarch64.c, config/aarch64/aarch64.h,
config/aarch64/aarch64.md, config/alpha/alpha-protos.h,
config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c,
config/arc/arc.h, config/arc/predicates.md,
config/arm/aarch-common-protos.h, config/arm/aarch-common.c,
config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h,
config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md,
config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c,
config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c,
config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md,
config/cr16/cr16-protos.h, config/cr16/cr16.c,
config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md,
config/darwin-protos.h, config/darwin.c,
config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c,
config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md,
config/h8300/h8300-protos.h, config/h8300/h8300.c,
config/i386/i386-builtin-types.awk, config/i386/i386-protos.h,
config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md,
config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h,
config/ia64/ia64.c, config/iq2000/iq2000-protos.h,
config/iq2000/iq2000.c, config/iq2000/iq2000.md,
config/lm32/lm32-protos.h, config/lm32/lm32.c,
config/m32c/m32c-protos.h, config/m32c/m32c.c,
config/m32r/m32r-protos.h, config/m32r/m32r.c,
config/m68k/m68k-protos.h, config/m68k/m68k.c,
config/mcore/mcore-protos.h, config/mcore/mcore.c,
config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c,
config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c,
config/mips/mips-protos.h, config/mips/mips.c,
config/mmix/mmix-protos.h, config/mmix/mmix.c,
config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430-protos.h,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-protos.h, config/nds32/nds32.c,
config/nios2/nios2-protos.h, config/nios2/nios2.c,
config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h,
config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000-c.c,
config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c,
config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c,
config/s390/predicates.md, config/s390/s390-protos.h,
config/s390/s390.c, config/s390/s390.h, config/s390/s390.md,
config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c,
config/sh/sh.md, config/sparc/predicates.md,
config/sparc/sparc-protos.h, config/sparc/sparc.c,
config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c,
config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c,
config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c,
config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h,
config/tilepro/tilepro.c, config/v850/v850-protos.h,
config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h,
config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h,
config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h,
dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi,
doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c,
dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h,
except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c,
fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h,
fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c,
genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c,
graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c,
internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c,
ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h,
libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c,
loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c,
lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c,
lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h,
output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h,
recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c,
regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h,
rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c,
sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c,
simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def,
targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c,
tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c,
tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c,
tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c,
tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c,
tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c,
tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c,
tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c,
tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Remove redundant enum from
machine_mode.
gcc/
* gengtype.c (main): Treat machine_mode as a scalar typedef.
* genmodes.c (emit_insn_modes_h): Hide inline functions if
USED_FOR_TARGET.
From-SVN: r216834
2014-10-29 13:02:45 +01:00
|
|
|
machine_mode mode = <VSTRUCT:MODE>mode;
|
2012-10-23 19:02:30 +02:00
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[0]);
|
|
|
|
|
2015-09-02 16:04:22 +02:00
|
|
|
emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
|
2012-10-23 19:02:30 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2015-09-15 15:09:45 +02:00
|
|
|
(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
|
2014-04-28 23:05:51 +02:00
|
|
|
[(match_operand:DI 0 "register_operand" "r")
|
2015-09-15 15:09:45 +02:00
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "w")
|
2015-07-22 12:44:16 +02:00
|
|
|
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
|
2014-04-28 23:05:51 +02:00
|
|
|
(match_operand:SI 2 "immediate_operand")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
2015-09-15 14:43:07 +02:00
|
|
|
rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
|
2015-09-15 15:09:45 +02:00
|
|
|
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
|
|
|
|
* <VSTRUCT:nregs>);
|
2014-04-28 23:05:51 +02:00
|
|
|
|
2015-09-15 15:09:45 +02:00
|
|
|
emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
|
|
|
|
mem, operands[1], operands[2]));
|
2014-04-28 23:05:51 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
(define_expand "aarch64_st1<VALL_F16:mode>"
|
2013-07-03 11:48:02 +02:00
|
|
|
[(match_operand:DI 0 "register_operand")
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
(match_operand:VALL_F16 1 "register_operand")]
|
2013-07-03 11:48:02 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
machine_mode mode = <VALL_F16:MODE>mode;
|
2013-07-03 11:48:02 +02:00
|
|
|
rtx mem = gen_rtx_MEM (mode, operands[0]);
|
2014-01-23 15:46:31 +01:00
|
|
|
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
[AArch64] Add support for float16x{4,8}_t vectors/builtins
gcc/:
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
V4HFmode and V8HFmode.
(aarch64_split_simd_move): Add case for V8HFmode.
* config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
(aarch64_simd_builtin_std_type): Handle HFmode.
(aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
* config/aarch64/aarch64-simd.md (mov<mode>, aarch64_get_lane<mode>,
aarch64_ld1<VALL:mode>, aarch64_st1<VALL:mode): Use VALL_F16 iterator.
(aarch64_be_ld1<mode>, aarch64_be_st1<mode>): Use VALLDI_F16 iterator.
* config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
Float16x8_t.
* config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
* config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
New typedefs.
(vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
vst1q_lane_f16): New.
* config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
(VALLDI_F16, VALL_F16): New.
(Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
Add cases for V4HF and V8HF.
(VDBL, VRL2, VRL3, VRL4): Add V4HF case.
gcc/testsuite/:
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
float16x8_t.
* gcc.target/aarch64/vset_lane_1.c: Likewise.
* gcc.target/aarch64/vld1-vst1_1.c: Likewise.
* gcc.target/aarch64/vld1_lane.c: Likewise.
From-SVN: r227542
2015-09-08 20:57:31 +02:00
|
|
|
emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
|
2014-01-23 15:46:31 +01:00
|
|
|
else
|
|
|
|
emit_move_insn (mem, operands[1]);
|
2013-07-03 11:48:02 +02:00
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2012-10-23 19:02:30 +02:00
|
|
|
;; Expander for builtins to insert vector registers into large
|
|
|
|
;; opaque integer modes.
|
|
|
|
|
|
|
|
;; Q-register list. We don't need a D-reg inserter as we zero
|
|
|
|
;; extend them in arm_neon.h and insert the resulting Q-regs.
|
|
|
|
|
|
|
|
(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
|
|
|
|
[(match_operand:VSTRUCT 0 "register_operand" "+w")
|
|
|
|
(match_operand:VSTRUCT 1 "register_operand" "0")
|
|
|
|
(match_operand:VQ 2 "register_operand" "w")
|
|
|
|
(match_operand:SI 3 "immediate_operand" "i")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
int part = INTVAL (operands[3]);
|
|
|
|
int offset = part * 16;
|
|
|
|
|
|
|
|
emit_move_insn (operands[0], operands[1]);
|
|
|
|
emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
|
|
|
|
operands[2]);
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)
PR target/80846
* optabs.def (vec_extract_optab, vec_init_optab): Change from
a direct optab to conversion optab.
* optabs.c (expand_vector_broadcast): Use convert_optab_handler
with GET_MODE_INNER as last argument instead of optab_handler.
* expmed.c (extract_bit_field_1): Likewise. Use vector from
vector extraction if possible and optab is available.
* expr.c (store_constructor): Use convert_optab_handler instead
of optab_handler. Use vector initialization from smaller
vectors if possible and optab is available.
* tree-vect-stmts.c (vectorizable_load): Likewise.
* doc/md.texi (vec_extract, vec_init): Document that the optabs
now have two modes.
* config/i386/i386.c (ix86_expand_vector_init): Handle expansion
of vec_init from half-sized vectors with the same element mode.
* config/i386/sse.md (ssehalfvecmode): Add V4TI case.
(ssehalfvecmodelower, ssescalarmodelower): New mode attributes.
(reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df,
reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf,
reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode
after mode in gen_vec_extract* calls.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><ssescalarmodelower>): ... this.
(vec_extract<mode><ssehalfvecmodelower>): New expander.
(rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add
element mode after mode in gen_vec_init* calls.
(VEC_INIT_HALF_MODE): New mode iterator.
(vec_init<mode>): Renamed to ...
(vec_init<mode><ssescalarmodelower>): ... this.
(vec_init<mode><ssehalfvecmodelower>): New expander.
* config/i386/mmx.md (vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2si): Renamed to ...
(vec_extractv2sisi): ... this.
(vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_extractv4hi): Renamed to ...
(vec_extractv4hihi): ... this.
(vec_initv4hi): Renamed to ...
(vec_initv4hihi): ... this.
(vec_extractv8qi): Renamed to ...
(vec_extractv8qiqi): ... this.
(vec_initv8qi): Renamed to ...
(vec_initv8qiqi): ... this.
* config/rs6000/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/rs6000/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add
element mode after mode in gen_vec_init* calls.
* config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ...
(vec_init<mode><Vel>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><Vel>): ... this.
* config/aarch64/iterators.md (Vel): New mode attribute.
* config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr):
Add element mode after mode in gen_vec_extract* calls.
* config/s390/vector.md (non_vec_l): New mode attribute.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><non_vec_l>): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><non_vec_l>): ... this.
* config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf,
s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after
vec_extract mode.
* config/arm/iterators.md (V_elem_l): New mode attribute.
* config/arm/neon.md (vec_extract<mode>): Renamed to ...
(vec_extract<mode><V_elem_l>): ... this.
(vec_extractv2di): Renamed to ...
(vec_extractv2didi): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><V_elem_l>): ... this.
(reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>,
reduc_smax_scal_<mode>, reduc_umin_scal_<mode>,
reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>):
Add element mode after gen_vec_extract* calls.
* config/mips/mips-msa.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><unitmode>): ... this.
* config/mips/loongson.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
* config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf):
Add element mode after gen_vec_extract* calls.
* config/mips/mips.md (unitmode): New mode iterator.
* config/spu/spu.c (spu_expand_prologue, spu_allocate_stack,
spu_builtin_extract): Add element mode after gen_vec_extract* calls.
* config/spu/spu.md (inner_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><inner_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><inner_l>): ... this.
* config/sparc/sparc.md (veltmode): New mode iterator.
(vec_init<VMALL:mode>): Renamed to ...
(vec_init<VMALL:mode><VMALL:veltmode>): ... this.
* config/ia64/vect.md (vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
* config/powerpcspe/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/powerpcspe/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in
gen_vec_init* calls.
From-SVN: r250759
2017-08-01 10:26:14 +02:00
|
|
|
;; Standard pattern name vec_init<mode><Vel>.
|
2013-01-08 18:19:57 +01:00
|
|
|
|
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)
PR target/80846
* optabs.def (vec_extract_optab, vec_init_optab): Change from
a direct optab to conversion optab.
* optabs.c (expand_vector_broadcast): Use convert_optab_handler
with GET_MODE_INNER as last argument instead of optab_handler.
* expmed.c (extract_bit_field_1): Likewise. Use vector from
vector extraction if possible and optab is available.
* expr.c (store_constructor): Use convert_optab_handler instead
of optab_handler. Use vector initialization from smaller
vectors if possible and optab is available.
* tree-vect-stmts.c (vectorizable_load): Likewise.
* doc/md.texi (vec_extract, vec_init): Document that the optabs
now have two modes.
* config/i386/i386.c (ix86_expand_vector_init): Handle expansion
of vec_init from half-sized vectors with the same element mode.
* config/i386/sse.md (ssehalfvecmode): Add V4TI case.
(ssehalfvecmodelower, ssescalarmodelower): New mode attributes.
(reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df,
reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf,
reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode
after mode in gen_vec_extract* calls.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><ssescalarmodelower>): ... this.
(vec_extract<mode><ssehalfvecmodelower>): New expander.
(rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add
element mode after mode in gen_vec_init* calls.
(VEC_INIT_HALF_MODE): New mode iterator.
(vec_init<mode>): Renamed to ...
(vec_init<mode><ssescalarmodelower>): ... this.
(vec_init<mode><ssehalfvecmodelower>): New expander.
* config/i386/mmx.md (vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2si): Renamed to ...
(vec_extractv2sisi): ... this.
(vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_extractv4hi): Renamed to ...
(vec_extractv4hihi): ... this.
(vec_initv4hi): Renamed to ...
(vec_initv4hihi): ... this.
(vec_extractv8qi): Renamed to ...
(vec_extractv8qiqi): ... this.
(vec_initv8qi): Renamed to ...
(vec_initv8qiqi): ... this.
* config/rs6000/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/rs6000/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add
element mode after mode in gen_vec_init* calls.
* config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ...
(vec_init<mode><Vel>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><Vel>): ... this.
* config/aarch64/iterators.md (Vel): New mode attribute.
* config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr):
Add element mode after mode in gen_vec_extract* calls.
* config/s390/vector.md (non_vec_l): New mode attribute.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><non_vec_l>): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><non_vec_l>): ... this.
* config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf,
s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after
vec_extract mode.
* config/arm/iterators.md (V_elem_l): New mode attribute.
* config/arm/neon.md (vec_extract<mode>): Renamed to ...
(vec_extract<mode><V_elem_l>): ... this.
(vec_extractv2di): Renamed to ...
(vec_extractv2didi): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><V_elem_l>): ... this.
(reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>,
reduc_smax_scal_<mode>, reduc_umin_scal_<mode>,
reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>):
Add element mode after gen_vec_extract* calls.
* config/mips/mips-msa.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><unitmode>): ... this.
* config/mips/loongson.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
* config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf):
Add element mode after gen_vec_extract* calls.
* config/mips/mips.md (unitmode): New mode iterator.
* config/spu/spu.c (spu_expand_prologue, spu_allocate_stack,
spu_builtin_extract): Add element mode after gen_vec_extract* calls.
* config/spu/spu.md (inner_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><inner_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><inner_l>): ... this.
* config/sparc/sparc.md (veltmode): New mode iterator.
(vec_init<VMALL:mode>): Renamed to ...
(vec_init<VMALL:mode><VMALL:veltmode>): ... this.
* config/ia64/vect.md (vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
* config/powerpcspe/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/powerpcspe/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in
gen_vec_init* calls.
From-SVN: r250759
2017-08-01 10:26:14 +02:00
|
|
|
(define_expand "vec_init<mode><Vel>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(match_operand:VALL_F16 0 "register_operand" "")
|
2013-01-08 18:19:57 +01:00
|
|
|
(match_operand 1 "" "")]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
{
|
|
|
|
aarch64_expand_vector_init (operands[0], operands[1]);
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
2013-01-14 18:48:52 +01:00
|
|
|
(define_insn "*aarch64_simd_ld1r<mode>"
|
2015-09-08 21:18:29 +02:00
|
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
|
|
(vec_duplicate:VALL_F16
|
2013-01-14 18:48:52 +01:00
|
|
|
(match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld1r\\t{%0.<Vtype>}, %1"
|
2013-11-15 18:05:37 +01:00
|
|
|
[(set_attr "type" "neon_load1_all_lanes")]
|
|
|
|
)
|
2013-04-22 14:36:52 +02:00
|
|
|
|
2017-12-27 12:47:45 +01:00
|
|
|
(define_insn "aarch64_simd_ld1<mode>_x2"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD1))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load1_2reg<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_ld1<mode>_x2"
|
|
|
|
[(set (match_operand:OI 0 "register_operand" "=w")
|
|
|
|
(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
|
|
|
|
(unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
|
|
|
UNSPEC_LD1))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
|
|
|
[(set_attr "type" "neon_load1_2reg<q>")]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2013-04-22 14:36:52 +02:00
|
|
|
(define_insn "aarch64_frecpe<mode>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
|
|
|
|
UNSPEC_FRECPE))]
|
2013-04-22 14:36:52 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"frecpe\\t%0.<Vtype>, %1.<Vtype>"
|
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics
gcc/
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
From-SVN: r238716
2016-07-25 16:20:37 +02:00
|
|
|
[(set_attr "type" "neon_fp_recpe_<stype><q>")]
|
2013-04-22 14:36:52 +02:00
|
|
|
)
|
|
|
|
|
2013-09-05 17:53:37 +02:00
|
|
|
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
|
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
gcc/
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
mode.
(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise.
* config/aarch64/arm_neon.h: Include arm_fp16.h.
* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
Support HF mode.
* config/aarch64/arm_fp16.h: New file.
(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
vsqrth_f16): New.
From-SVN: r238722
2016-07-25 18:00:28 +02:00
|
|
|
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
|
|
|
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
|
|
|
|
FRECP))]
|
2013-09-05 17:53:37 +02:00
|
|
|
"TARGET_SIMD"
|
|
|
|
"frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
|
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
gcc/
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
mode.
(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise.
* config/aarch64/arm_neon.h: Include arm_fp16.h.
* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
Support HF mode.
* config/aarch64/arm_fp16.h: New file.
(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
vsqrth_f16): New.
From-SVN: r238722
2016-07-25 18:00:28 +02:00
|
|
|
[(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
|
2013-09-05 17:53:37 +02:00
|
|
|
)
|
|
|
|
|
2013-04-22 14:36:52 +02:00
|
|
|
(define_insn "aarch64_frecps<mode>"
|
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
From-SVN: r238723
2016-07-25 18:10:52 +02:00
|
|
|
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
|
|
|
(unspec:VHSDF_HSDF
|
|
|
|
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
|
|
|
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
UNSPEC_FRECPS))]
|
2013-04-22 14:36:52 +02:00
|
|
|
"TARGET_SIMD"
|
2013-09-05 17:53:37 +02:00
|
|
|
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
2016-07-25 16:30:52 +02:00
|
|
|
[(set_attr "type" "neon_fp_recps_<stype><q>")]
|
2013-04-22 14:36:52 +02:00
|
|
|
)
|
|
|
|
|
arm_neon.h (vrecpe_u32, [...]): Rewrite using builtin functions.
* config/aarch64/arm_neon.h (vrecpe_u32, vrecpeq_u32): Rewrite using
builtin functions.
(vfma_f32, vfmaq_f32, vfmaq_f64, vfma_n_f32, vfmaq_n_f32, vfmaq_n_f64,
vfms_f32, vfmsq_f32, vfmsq_f64): Likewise.
(vhsub_s8, vhsub_u8, vhsub_s16, vhsub_u16, vhsub_s32, vhsub_u32,
vhsubq_s8, vhsubq_u8, vhsubq_s16, vhsubq_u16, vhsubq_s32, vhsubq_u32,
vsubhn_s16, vsubhn_u16, vsubhn_s32, vsubhn_u32, vsubhn_s64, vsubhn_u66,
vrsubhn_s16, vrsubhn_u16, vrsubhn_s32, vrsubhn_u32, vrsubhn_s64,
vrsubhn_u64, vsubhn_high_s16, vsubhn_high_u16, vsubhn_high_s32,
vsubhn_high_u32, vsubhn_high_s64, vsubhn_high_u64, vrsubhn_high_s16,
vrsubhn_high_u16, vrsubhn_high_s32, vrsubhn_high_u32, vrsubhn_high_s64,
vrsubhn_high_u64): Likewise.
* config/aarch64/iterators.md (VDQ_SI): New mode iterator.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_URECPE.
* config/aarch64/aarch64-simd.md (aarch64_urecpe<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def (shsub, uhsub, subhn, rsubhn,
subhn2, rsubhn2, urecpe): New builtins.
Co-Authored-By: Haijian Zhang <z.zhanghaijian@huawei.com>
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
Co-Authored-By: Pengfei Sui <suipengfei@huawei.com>
From-SVN: r218484
2014-12-08 15:19:44 +01:00
|
|
|
(define_insn "aarch64_urecpe<mode>"
|
|
|
|
[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
|
|
|
|
UNSPEC_URECPE))]
|
|
|
|
"TARGET_SIMD"
|
|
|
|
"urecpe\\t%0.<Vtype>, %1.<Vtype>"
|
|
|
|
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
|
|
|
|
|
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)
PR target/80846
* optabs.def (vec_extract_optab, vec_init_optab): Change from
a direct optab to conversion optab.
* optabs.c (expand_vector_broadcast): Use convert_optab_handler
with GET_MODE_INNER as last argument instead of optab_handler.
* expmed.c (extract_bit_field_1): Likewise. Use vector from
vector extraction if possible and optab is available.
* expr.c (store_constructor): Use convert_optab_handler instead
of optab_handler. Use vector initialization from smaller
vectors if possible and optab is available.
* tree-vect-stmts.c (vectorizable_load): Likewise.
* doc/md.texi (vec_extract, vec_init): Document that the optabs
now have two modes.
* config/i386/i386.c (ix86_expand_vector_init): Handle expansion
of vec_init from half-sized vectors with the same element mode.
* config/i386/sse.md (ssehalfvecmode): Add V4TI case.
(ssehalfvecmodelower, ssescalarmodelower): New mode attributes.
(reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df,
reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf,
reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode
after mode in gen_vec_extract* calls.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><ssescalarmodelower>): ... this.
(vec_extract<mode><ssehalfvecmodelower>): New expander.
(rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add
element mode after mode in gen_vec_init* calls.
(VEC_INIT_HALF_MODE): New mode iterator.
(vec_init<mode>): Renamed to ...
(vec_init<mode><ssescalarmodelower>): ... this.
(vec_init<mode><ssehalfvecmodelower>): New expander.
* config/i386/mmx.md (vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2si): Renamed to ...
(vec_extractv2sisi): ... this.
(vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_extractv4hi): Renamed to ...
(vec_extractv4hihi): ... this.
(vec_initv4hi): Renamed to ...
(vec_initv4hihi): ... this.
(vec_extractv8qi): Renamed to ...
(vec_extractv8qiqi): ... this.
(vec_initv8qi): Renamed to ...
(vec_initv8qiqi): ... this.
* config/rs6000/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/rs6000/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add
element mode after mode in gen_vec_init* calls.
* config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ...
(vec_init<mode><Vel>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><Vel>): ... this.
* config/aarch64/iterators.md (Vel): New mode attribute.
* config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr):
Add element mode after mode in gen_vec_extract* calls.
* config/s390/vector.md (non_vec_l): New mode attribute.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><non_vec_l>): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><non_vec_l>): ... this.
* config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf,
s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after
vec_extract mode.
* config/arm/iterators.md (V_elem_l): New mode attribute.
* config/arm/neon.md (vec_extract<mode>): Renamed to ...
(vec_extract<mode><V_elem_l>): ... this.
(vec_extractv2di): Renamed to ...
(vec_extractv2didi): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><V_elem_l>): ... this.
(reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>,
reduc_smax_scal_<mode>, reduc_umin_scal_<mode>,
reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>):
Add element mode after gen_vec_extract* calls.
* config/mips/mips-msa.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><unitmode>): ... this.
* config/mips/loongson.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
* config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf):
Add element mode after gen_vec_extract* calls.
* config/mips/mips.md (unitmode): New mode iterator.
* config/spu/spu.c (spu_expand_prologue, spu_allocate_stack,
spu_builtin_extract): Add element mode after gen_vec_extract* calls.
* config/spu/spu.md (inner_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><inner_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><inner_l>): ... this.
* config/sparc/sparc.md (veltmode): New mode iterator.
(vec_init<VMALL:mode>): Renamed to ...
(vec_init<VMALL:mode><VMALL:veltmode>): ... this.
* config/ia64/vect.md (vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
* config/powerpcspe/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/powerpcspe/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in
gen_vec_init* calls.
From-SVN: r250759
2017-08-01 10:26:14 +02:00
|
|
|
;; Standard pattern name vec_extract<mode><Vel>.
|
2013-11-13 16:07:27 +01:00
|
|
|
|
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)
PR target/80846
* optabs.def (vec_extract_optab, vec_init_optab): Change from
a direct optab to conversion optab.
* optabs.c (expand_vector_broadcast): Use convert_optab_handler
with GET_MODE_INNER as last argument instead of optab_handler.
* expmed.c (extract_bit_field_1): Likewise. Use vector from
vector extraction if possible and optab is available.
* expr.c (store_constructor): Use convert_optab_handler instead
of optab_handler. Use vector initialization from smaller
vectors if possible and optab is available.
* tree-vect-stmts.c (vectorizable_load): Likewise.
* doc/md.texi (vec_extract, vec_init): Document that the optabs
now have two modes.
* config/i386/i386.c (ix86_expand_vector_init): Handle expansion
of vec_init from half-sized vectors with the same element mode.
* config/i386/sse.md (ssehalfvecmode): Add V4TI case.
(ssehalfvecmodelower, ssescalarmodelower): New mode attributes.
(reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df,
reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf,
reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode
after mode in gen_vec_extract* calls.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><ssescalarmodelower>): ... this.
(vec_extract<mode><ssehalfvecmodelower>): New expander.
(rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add
element mode after mode in gen_vec_init* calls.
(VEC_INIT_HALF_MODE): New mode iterator.
(vec_init<mode>): Renamed to ...
(vec_init<mode><ssescalarmodelower>): ... this.
(vec_init<mode><ssehalfvecmodelower>): New expander.
* config/i386/mmx.md (vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2si): Renamed to ...
(vec_extractv2sisi): ... this.
(vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_extractv4hi): Renamed to ...
(vec_extractv4hihi): ... this.
(vec_initv4hi): Renamed to ...
(vec_initv4hihi): ... this.
(vec_extractv8qi): Renamed to ...
(vec_extractv8qiqi): ... this.
(vec_initv8qi): Renamed to ...
(vec_initv8qiqi): ... this.
* config/rs6000/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/rs6000/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add
element mode after mode in gen_vec_init* calls.
* config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ...
(vec_init<mode><Vel>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><Vel>): ... this.
* config/aarch64/iterators.md (Vel): New mode attribute.
* config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr):
Add element mode after mode in gen_vec_extract* calls.
* config/s390/vector.md (non_vec_l): New mode attribute.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><non_vec_l>): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><non_vec_l>): ... this.
* config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf,
s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after
vec_extract mode.
* config/arm/iterators.md (V_elem_l): New mode attribute.
* config/arm/neon.md (vec_extract<mode>): Renamed to ...
(vec_extract<mode><V_elem_l>): ... this.
(vec_extractv2di): Renamed to ...
(vec_extractv2didi): ... this.
(vec_init<mode>): Renamed to ...
(vec_init<mode><V_elem_l>): ... this.
(reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>,
reduc_smax_scal_<mode>, reduc_umin_scal_<mode>,
reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>):
Add element mode after gen_vec_extract* calls.
* config/mips/mips-msa.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><unitmode>): ... this.
* config/mips/loongson.md (vec_init<mode>): Renamed to ...
(vec_init<mode><unitmode>): ... this.
* config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
(reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf):
Add element mode after gen_vec_extract* calls.
* config/mips/mips.md (unitmode): New mode iterator.
* config/spu/spu.c (spu_expand_prologue, spu_allocate_stack,
spu_builtin_extract): Add element mode after gen_vec_extract* calls.
* config/spu/spu.md (inner_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><inner_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><inner_l>): ... this.
* config/sparc/sparc.md (veltmode): New mode iterator.
(vec_init<VMALL:mode>): Renamed to ...
(vec_init<VMALL:mode><VMALL:veltmode>): ... this.
* config/ia64/vect.md (vec_initv2si): Renamed to ...
(vec_initv2sisi): ... this.
(vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
(vec_extractv2sf): Renamed to ...
(vec_extractv2sfsf): ... this.
* config/powerpcspe/vector.md (VEC_base_l): New mode attribute.
(vec_init<mode>): Renamed to ...
(vec_init<mode><VEC_base_l>): ... this.
(vec_extract<mode>): Renamed to ...
(vec_extract<mode><VEC_base_l>): ... this.
* config/powerpcspe/paired.md (vec_initv2sf): Renamed to ...
(vec_initv2sfsf): ... this.
* config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3,
vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi,
vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in
gen_vec_init* calls.
From-SVN: r250759
2017-08-01 10:26:14 +02:00
|
|
|
(define_expand "vec_extract<mode><Vel>"
|
2013-11-22 16:29:19 +01:00
|
|
|
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
|
2015-09-08 21:18:29 +02:00
|
|
|
(match_operand:VALL_F16 1 "register_operand" "")
|
2013-11-22 16:29:19 +01:00
|
|
|
(match_operand:SI 2 "immediate_operand" "")]
|
2013-11-13 16:07:27 +01:00
|
|
|
"TARGET_SIMD"
|
2013-11-22 16:29:19 +01:00
|
|
|
{
|
|
|
|
emit_insn
|
|
|
|
(gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
|
|
|
|
DONE;
|
|
|
|
})
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
|
|
|
|
;; aes
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
|
|
|
|
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
|
|
|
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
|
|
|
|
(match_operand:V16QI 2 "register_operand" "w")]
|
|
|
|
CRYPTO_AES))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_AES"
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
"aes<aes_op>\\t%0.16b, %2.16b"
|
2014-03-28 18:22:47 +01:00
|
|
|
[(set_attr "type" "crypto_aese")]
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
)
|
|
|
|
|
2016-05-27 15:44:57 +02:00
|
|
|
;; When AES/AESMC fusion is enabled we want the register allocation to
|
|
|
|
;; look like:
|
|
|
|
;; AESE Vn, _
|
|
|
|
;; AESMC Vn, Vn
|
|
|
|
;; So prefer to tie operand 1 to operand 0 when fusing.
|
|
|
|
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
|
2016-05-27 15:44:57 +02:00
|
|
|
[(set (match_operand:V16QI 0 "register_operand" "=w,w")
|
|
|
|
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
CRYPTO_AESMC))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_AES"
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
"aes<aesmc_op>\\t%0.16b, %1.16b"
|
2016-05-27 15:44:57 +02:00
|
|
|
[(set_attr "type" "crypto_aesmc")
|
|
|
|
(set_attr_alternative "enabled"
|
|
|
|
[(if_then_else (match_test
|
|
|
|
"aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
|
|
|
|
(const_string "yes" )
|
|
|
|
(const_string "no"))
|
|
|
|
(const_string "yes")])]
|
Implement support for AArch64 Crypto AES.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers,
TYPES_BINOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi,
aarch64_crypto_aes<aesmc_op>v16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.
testsuite/
* gcc.target/aarch64/aes_1.c: New.
From-SVN: r206117
2013-12-19 15:51:28 +01:00
|
|
|
)
|
|
|
|
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
;; sha1
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha1hsi"
|
|
|
|
[(set (match_operand:SI 0 "register_operand" "=w")
|
|
|
|
(unspec:SI [(match_operand:SI 1
|
|
|
|
"register_operand" "w")]
|
|
|
|
UNSPEC_SHA1H))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
"sha1h\\t%s0, %s1"
|
|
|
|
[(set_attr "type" "crypto_sha1_fast")]
|
|
|
|
)
|
|
|
|
|
2016-11-02 15:00:22 +01:00
|
|
|
(define_insn "aarch64_crypto_sha1hv4si"
|
|
|
|
[(set (match_operand:SI 0 "register_operand" "=w")
|
|
|
|
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
|
|
|
|
(parallel [(const_int 0)]))]
|
|
|
|
UNSPEC_SHA1H))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
|
2016-11-02 15:00:22 +01:00
|
|
|
"sha1h\\t%s0, %s1"
|
|
|
|
[(set_attr "type" "crypto_sha1_fast")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_be_crypto_sha1hv4si"
|
|
|
|
[(set (match_operand:SI 0 "register_operand" "=w")
|
|
|
|
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
|
|
|
|
(parallel [(const_int 3)]))]
|
|
|
|
UNSPEC_SHA1H))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
|
2016-11-02 15:00:22 +01:00
|
|
|
"sha1h\\t%s0, %s1"
|
|
|
|
[(set_attr "type" "crypto_sha1_fast")]
|
|
|
|
)
|
|
|
|
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
(define_insn "aarch64_crypto_sha1su1v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA1SU1))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
"sha1su1\\t%0.4s, %2.4s"
|
|
|
|
[(set_attr "type" "crypto_sha1_fast")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
CRYPTO_SHA1))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sha1_slow")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha1su0v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA1SU0))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
Implement support for AArch64 Crypto SHA1.
gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers,
TYPES_TERNOPU): New.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1<sha1_op>v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1<CPMH>, UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.
testsuite/
* gcc.target/aarch64/sha1_1.c: New.
From-SVN: r206118
2013-12-19 15:55:47 +01:00
|
|
|
"sha1su0\\t%0.4s, %2.4s, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sha1_xor")]
|
|
|
|
)
|
2013-12-19 16:00:53 +01:00
|
|
|
|
|
|
|
;; sha256
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
CRYPTO_SHA256))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
2013-12-19 16:00:53 +01:00
|
|
|
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sha256_slow")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha256su0v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA256SU0))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
2013-12-19 16:00:53 +01:00
|
|
|
"sha256su0\\t%0.4s, %2.4s"
|
|
|
|
[(set_attr "type" "crypto_sha256_fast")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha256su1v4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA256SU1))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_SHA2"
|
2013-12-19 16:00:53 +01:00
|
|
|
"sha256su1\\t%0.4s, %2.4s, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sha256_slow")]
|
|
|
|
)
|
2013-12-19 16:04:19 +01:00
|
|
|
|
2018-01-11 07:04:17 +01:00
|
|
|
;; sha512
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
|
|
|
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w")
|
|
|
|
(match_operand:V2DI 3 "register_operand" "w")]
|
|
|
|
CRYPTO_SHA512))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"sha512h<sha512_op>\\t%q0, %q2, %3.2d"
|
|
|
|
[(set_attr "type" "crypto_sha512")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha512su0qv2di"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
|
|
|
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA512SU0))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"sha512su0\\t%0.2d, %2.2d"
|
|
|
|
[(set_attr "type" "crypto_sha512")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_sha512su1qv2di"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
|
|
|
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w")
|
|
|
|
(match_operand:V2DI 3 "register_operand" "w")]
|
|
|
|
UNSPEC_SHA512SU1))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"sha512su1\\t%0.2d, %2.2d, %3.2d"
|
|
|
|
[(set_attr "type" "crypto_sha512")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; sha3
|
|
|
|
|
|
|
|
(define_insn "aarch64_eor3qv8hi"
|
|
|
|
[(set (match_operand:V8HI 0 "register_operand" "=w")
|
|
|
|
(xor:V8HI
|
|
|
|
(xor:V8HI
|
|
|
|
(match_operand:V8HI 2 "register_operand" "%w")
|
|
|
|
(match_operand:V8HI 3 "register_operand" "w"))
|
|
|
|
(match_operand:V8HI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
|
|
|
|
[(set_attr "type" "crypto_sha3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_rax1qv2di"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
|
|
|
(xor:V2DI
|
|
|
|
(rotate:V2DI
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w")
|
|
|
|
(const_int 1))
|
|
|
|
(match_operand:V2DI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"rax1\\t%0.2d, %1.2d, %2.2d"
|
|
|
|
[(set_attr "type" "crypto_sha3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_xarqv2di"
|
|
|
|
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
|
|
|
(rotatert:V2DI
|
|
|
|
(xor:V2DI
|
|
|
|
(match_operand:V2DI 1 "register_operand" "%w")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w"))
|
|
|
|
(match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"xar\\t%0.2d, %1.2d, %2.2d, %3"
|
|
|
|
[(set_attr "type" "crypto_sha3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_bcaxqv8hi"
|
|
|
|
[(set (match_operand:V8HI 0 "register_operand" "=w")
|
|
|
|
(xor:V8HI
|
|
|
|
(and:V8HI
|
|
|
|
(not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
|
|
|
|
(match_operand:V8HI 2 "register_operand" "w"))
|
|
|
|
(match_operand:V8HI 1 "register_operand" "w")))]
|
|
|
|
"TARGET_SIMD && TARGET_SHA3"
|
|
|
|
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
|
|
|
|
[(set_attr "type" "crypto_sha3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; SM3
|
|
|
|
|
|
|
|
(define_insn "aarch64_sm3ss1qv4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
UNSPEC_SM3SS1))]
|
|
|
|
"TARGET_SIMD && TARGET_SM4"
|
|
|
|
"sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sm3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")
|
|
|
|
(match_operand:SI 4 "aarch64_imm2" "Ui2")]
|
|
|
|
CRYPTO_SM3TT))]
|
|
|
|
"TARGET_SIMD && TARGET_SM4"
|
|
|
|
"sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
|
|
|
|
[(set_attr "type" "crypto_sm3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 3 "register_operand" "w")]
|
|
|
|
CRYPTO_SM3PART))]
|
|
|
|
"TARGET_SIMD && TARGET_SM4"
|
|
|
|
"sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
|
|
|
|
[(set_attr "type" "crypto_sm3")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; SM4
|
|
|
|
|
|
|
|
(define_insn "aarch64_sm4eqv4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_SM4E))]
|
|
|
|
"TARGET_SIMD && TARGET_SM4"
|
|
|
|
"sm4e\\t%0.4s, %2.4s"
|
|
|
|
[(set_attr "type" "crypto_sm4")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_sm4ekeyqv4si"
|
|
|
|
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
|
|
|
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
|
|
|
|
(match_operand:V4SI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_SM4EKEY))]
|
|
|
|
"TARGET_SIMD && TARGET_SM4"
|
|
|
|
"sm4ekey\\t%0.4s, %1.4s, %2.4s"
|
|
|
|
[(set_attr "type" "crypto_sm4")]
|
|
|
|
)
|
|
|
|
|
|
|
|
;; fp16fml
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQSF
|
|
|
|
[(match_operand:VDQSF 1 "register_operand" "0")
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")]
|
|
|
|
VFMLA16_LOW))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
int nunits = GET_MODE_NUNITS (<VFMLA_W>mode);
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, false);
|
|
|
|
rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, false);
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, p2));
|
|
|
|
DONE;
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(unspec:VDQSF
|
|
|
|
[(match_operand:VDQSF 1 "register_operand" "0")
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")]
|
|
|
|
VFMLA16_HIGH))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
int nunits = GET_MODE_NUNITS (<VFMLA_W>mode);
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, true);
|
|
|
|
rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, true);
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, p2));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
|
|
|
|
(match_operand:VDQSF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(neg:<VFMLA_SEL_W>
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
|
|
|
|
(match_operand:VDQSF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
|
|
|
|
(match_operand:VDQSF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
|
|
|
|
[(set (match_operand:VDQSF 0 "register_operand" "=w")
|
|
|
|
(fma:VDQSF
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(neg:<VFMLA_SEL_W>
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 2 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
|
|
|
|
(float_extend:VDQSF
|
|
|
|
(vec_select:<VFMLA_SEL_W>
|
|
|
|
(match_operand:<VFMLA_W> 3 "register_operand" "w")
|
|
|
|
(match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
|
|
|
|
(match_operand:VDQSF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "")
|
|
|
|
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
|
|
|
|
(match_operand:V4HF 2 "register_operand" "")
|
|
|
|
(match_operand:V4HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_imm2" "")]
|
|
|
|
VFMLA16_LOW))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
|
|
|
|
GET_MODE_NUNITS (V4HFmode),
|
|
|
|
false);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "")
|
|
|
|
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
|
|
|
|
(match_operand:V4HF 2 "register_operand" "")
|
|
|
|
(match_operand:V4HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_imm2" "")]
|
|
|
|
VFMLA16_HIGH))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
|
|
|
|
GET_MODE_NUNITS (V4HFmode),
|
|
|
|
true);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(neg:V2HF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal_lane_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(neg:V2HF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "")
|
|
|
|
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
|
|
|
|
(match_operand:V8HF 2 "register_operand" "")
|
|
|
|
(match_operand:V8HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_lane_imm3" "")]
|
|
|
|
VFMLA16_LOW))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
|
|
|
|
GET_MODE_NUNITS (V8HFmode),
|
|
|
|
false);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "")
|
|
|
|
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
|
|
|
|
(match_operand:V8HF 2 "register_operand" "")
|
|
|
|
(match_operand:V8HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_lane_imm3" "")]
|
|
|
|
VFMLA16_HIGH))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
|
|
|
|
GET_MODE_NUNITS (V8HFmode),
|
|
|
|
true);
|
|
|
|
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(neg:V4HF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(neg:V4HF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "")
|
|
|
|
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
|
|
|
|
(match_operand:V4HF 2 "register_operand" "")
|
|
|
|
(match_operand:V8HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_lane_imm3" "")]
|
|
|
|
VFMLA16_LOW))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
|
|
|
|
GET_MODE_NUNITS (V4HFmode),
|
|
|
|
false);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "")
|
|
|
|
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
|
|
|
|
(match_operand:V4HF 2 "register_operand" "")
|
|
|
|
(match_operand:V8HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_lane_imm3" "")]
|
|
|
|
VFMLA16_HIGH))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
|
|
|
|
GET_MODE_NUNITS(V4HFmode),
|
|
|
|
true);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(neg:V2HF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
|
|
|
|
[(set (match_operand:V2SF 0 "register_operand" "=w")
|
|
|
|
(fma:V2SF
|
|
|
|
(float_extend:V2SF
|
|
|
|
(neg:V2HF
|
|
|
|
(vec_select:V2HF
|
|
|
|
(match_operand:V4HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
|
|
|
|
(float_extend:V2SF
|
|
|
|
(vec_duplicate:V2HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V8HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
|
|
|
|
(match_operand:V2SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "")
|
|
|
|
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
|
|
|
|
(match_operand:V8HF 2 "register_operand" "")
|
|
|
|
(match_operand:V4HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_imm2" "")]
|
|
|
|
VFMLA16_LOW))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
|
|
|
|
GET_MODE_NUNITS (V8HFmode),
|
|
|
|
false);
|
|
|
|
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "")
|
|
|
|
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
|
|
|
|
(match_operand:V8HF 2 "register_operand" "")
|
|
|
|
(match_operand:V4HF 3 "register_operand" "")
|
|
|
|
(match_operand:SI 4 "aarch64_imm2" "")]
|
|
|
|
VFMLA16_HIGH))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
{
|
|
|
|
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
|
|
|
|
GET_MODE_NUNITS (V8HFmode),
|
|
|
|
true);
|
|
|
|
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
|
|
|
|
|
|
|
|
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
|
|
|
|
operands[1],
|
|
|
|
operands[2],
|
|
|
|
operands[3],
|
|
|
|
p1, lane));
|
|
|
|
DONE;
|
|
|
|
})
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(neg:V4HF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
|
|
|
|
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
|
|
|
(fma:V4SF
|
|
|
|
(float_extend:V4SF
|
|
|
|
(neg:V4HF
|
|
|
|
(vec_select:V4HF
|
|
|
|
(match_operand:V8HF 2 "register_operand" "w")
|
|
|
|
(match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
|
|
|
|
(float_extend:V4SF
|
|
|
|
(vec_duplicate:V4HF
|
|
|
|
(vec_select:HF
|
|
|
|
(match_operand:V4HF 3 "register_operand" "x")
|
|
|
|
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
|
|
|
|
(match_operand:V4SF 1 "register_operand" "0")))]
|
|
|
|
"TARGET_F16FML"
|
|
|
|
"fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
|
|
|
|
[(set_attr "type" "neon_fp_mul_s")]
|
|
|
|
)
|
|
|
|
|
2013-12-19 16:04:19 +01:00
|
|
|
;; pmull
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_pmulldi"
|
|
|
|
[(set (match_operand:TI 0 "register_operand" "=w")
|
|
|
|
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
|
|
|
|
(match_operand:DI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_PMULL))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_AES"
|
2013-12-19 16:04:19 +01:00
|
|
|
"pmull\\t%0.1q, %1.1d, %2.1d"
|
2017-06-21 07:36:03 +02:00
|
|
|
[(set_attr "type" "crypto_pmull")]
|
2013-12-19 16:04:19 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
(define_insn "aarch64_crypto_pmullv2di"
|
|
|
|
[(set (match_operand:TI 0 "register_operand" "=w")
|
|
|
|
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
|
|
|
|
(match_operand:V2DI 2 "register_operand" "w")]
|
|
|
|
UNSPEC_PMULL2))]
|
2018-01-11 07:04:17 +01:00
|
|
|
"TARGET_SIMD && TARGET_AES"
|
2013-12-19 16:04:19 +01:00
|
|
|
"pmull2\\t%0.1q, %1.2d, %2.2d"
|
2017-06-21 07:36:03 +02:00
|
|
|
[(set_attr "type" "crypto_pmull")]
|
2013-12-19 16:04:19 +01:00
|
|
|
)
|